Repo restructuring

author: William R. Otte <wotte@dre.vanderbilt.edu> 2006-07-24 15:50:30 +0000
committer: William R. Otte <wotte@dre.vanderbilt.edu> 2006-07-24 15:50:30 +0000
commit: c44379cc7d9c7aa113989237ab0f56db12aa5219 (patch)
tree: 66a84b20d47f2269d8bdc6e0323f338763424d3a /ACE/examples/Web_Crawler
parent: 3aff90f4a822fcf5d902bbfbcc9fa931d6191a8c (diff)
download: ATCD-c44379cc7d9c7aa113989237ab0f56db12aa5219.tar.gz
27 files changed, 3362 insertions, 0 deletions
diff --git a/ACE/examples/Web_Crawler/.cvsignore b/ACE/examples/Web_Crawler/.cvsignore
new file mode 100644
index 00000000000..ba2906d0666
--- /dev/null
+++ b/ACE/examples/Web_Crawler/.cvsignore
@@ -0,0 +1 @@
+main
diff --git a/ACE/examples/Web_Crawler/Command_Processor.cpp b/ACE/examples/Web_Crawler/Command_Processor.cpp
new file mode 100644
index 00000000000..83289095444
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Command_Processor.cpp
@@ -0,0 +1,128 @@
+// $Id$
+
+#include "ace/OS_NS_string.h"
+#include "URL.h"
+#include "HTTP_URL.h"
+#include "Options.h"
+#include "Command_Processor.h"
+#include "URL_Visitor.h"
+
+ACE_RCSID(Web_Crawler, Command_Processor, "$Id$")
+
+Command::~Command (void)
+{
+}
+
+URL_Command::URL_Command (URL *url)
+  : url_ (url)
+{
+}
+
+int
+URL_Command::execute (void)
+{
+
+  ACE_CString check_string
+    (ACE_TEXT_ALWAYS_CHAR (this->url_->url_addr ().get_path_name ()));
+  if (check_string.find ("news:") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".cgi") != ACE_CString::npos)
+     return 0;
+
+  if (check_string.find ("mailto") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".gif") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".pdf") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".map") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".bmp") != ACE_CString::npos)
+    return 0;
+
+  if (check_string.find (".jpg") != ACE_CString::npos)
+    return 0;
+
+  if (this->url_->accept (OPTIONS::instance ()->visitor ()) !=0)
+    {
+      ACE_DEBUG ((LM_DEBUG,
+                "Coudnt accept url\n"));
+      return -1;
+    }
+  return 0;
+}
+
+int
+URL_Command::destroy (void)
+{
+  delete this;
+  return 0;
+}
+Command_Processor::Command_Processor (void)
+{
+}
+
+Command_Processor::~Command_Processor (void)
+{
+}
+
+int
+Command_Processor::destroy (void)
+{
+  delete this;
+  return 0;
+return 0;
+}
+
+int
+Command_Processor::execute (void)
+{
+  Command *command;
+  while (this->url_queue_.is_empty () != 1)
+    {
+      if (this->url_queue_.dequeue_head (command) != 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n", "dequeue_head"),
+                          -1);
+      URL_Command *url_command = dynamic_cast<URL_Command *> (command);
+      Auto_Destroyer<URL_Command> url_command_ptr (url_command);
+      if (url_command_ptr->execute () != 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n", "Couldnt execute command"),
+                          -1);
+    }
+  return 0;
+}
+
+int
+Command_Processor::insert (Command *command)
+{
+  // According to the order specified the commands are removed from the queue.
+  if (this->url_queue_.is_full() != 1)
+    {
+      if (ACE_OS::strcmp (OPTIONS::instance ()->order (), ACE_TEXT ("FIFO")) == 0)
+        {
+          if (this->url_queue_.enqueue_tail (command) !=0)
+            ACE_ERROR_RETURN ((LM_ERROR,
+                               ACE_TEXT ("%p\n"), ACE_TEXT ("enqueue_tail")),
+                              - 1);
+        }
+      if (ACE_OS::strcmp (OPTIONS::instance ()->order (), ACE_TEXT ("LIFO")) == 0)
+        {
+          if (this->url_queue_.enqueue_head (command) !=0)
+            ACE_ERROR_RETURN ((LM_ERROR,
+                               ACE_TEXT ("%p\n"), ACE_TEXT ("enqueue_head")),
+                              - 1);
+        }
+    }
+  return 0;
+}
+
+#if defined (ACE_HAS_EXPLICIT_STATIC_TEMPLATE_MEMBER_INSTANTIATION)
+template ACE_Singleton<Options, ACE_Null_Mutex> *ACE_Singleton<Options, ACE_Null_Mutex>::singleton_;
+#endif /* ACE_HAS_EXPLICIT_STATIC_TEMPLATE_MEMBER_INSTANTIATION */
diff --git a/ACE/examples/Web_Crawler/Command_Processor.h b/ACE/examples/Web_Crawler/Command_Processor.h
new file mode 100644
index 00000000000..742a316804c
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Command_Processor.h
@@ -0,0 +1,98 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    Command_Processor.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _COMMAND_PROCESSOR_H
+#define _COMMAND_PROCESSOR_H
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+#include "ace/Containers.h"
+#include "Options.h"
+
+// Forward decl.
+class URL;
+
+class Command
+{
+  // = TITLE
+  //     Abstract base class for a command.
+  //
+  // = DESCRIPTION
+  //     Each command is executed by a <Command_Processor>.
+public:
+  virtual ~Command (void);
+  // Virtual destructor.
+
+  virtual int execute (void) = 0;
+  // This is the entry point to execute the command.
+  virtual int destroy (void) = 0;
+};
+
+class URL_Command : public Command
+{
+  // = TITLE
+  //     Defines an API for executing a command on a URL.
+  //
+  // = DESCRIPTION
+  //     Each command is executed by a <Command_Processor>.
+public:
+  URL_Command (URL *);
+  // Constructor.
+  
+  virtual int execute (void);
+  // Execute the URL command.
+
+  int destroy (void);
+  // Commit suicide.
+private:
+  URL *url_;
+  // Pointer to the URL.
+};
+
+class Command_Processor
+{
+  // = TITLE
+  //     Execute commands that are passed to it.
+  //
+  // = DESCRIPTION
+  //     This class implements the Command Processor pattern.
+public:
+  Command_Processor (void);
+
+  int insert (Command *);
+  // Insert a new <Command> into the <Command_Processor>'s queue.
+
+  int execute (void);
+  // Execute all the <Commands> in the queue.
+
+  int destroy (void);
+  // Destroy the <Command_Processor>.
+
+private:
+  friend class ACE_Shutup_GPlusPlus;
+  // Turn off g++ warning
+
+  ~Command_Processor (void);
+  // Ensure dynamic allocation.
+
+  // @@ You fill in here...
+  ACE_Unbounded_Queue<Command *> url_queue_;
+};
+
+
+#endif /* _COMMAND_PROCESSOR_H */
diff --git a/ACE/examples/Web_Crawler/HTTP_URL.cpp b/ACE/examples/Web_Crawler/HTTP_URL.cpp
new file mode 100644
index 00000000000..44ceea324d4
--- /dev/null
+++ b/ACE/examples/Web_Crawler/HTTP_URL.cpp
@@ -0,0 +1,87 @@
+// $Id$
+
+#include "ace/OS_NS_stdio.h"
+#include "ace/OS_NS_string.h"
+#include "ace/Auto_Ptr.h"
+#include "URL_Visitor.h"
+#include "Options.h"
+#include "HTTP_URL.h"
+
+ACE_RCSID(Web_Crawler, HTTP_URL, "$Id$")
+
+const ACE_URL_Addr &
+HTTP_URL::url_addr (void) const
+{
+  return this->url_addr_;
+}
+
+HTTP_URL::HTTP_URL (const ACE_URL_Addr &url_addr,
+                    HTTP_URL *cp)
+  : url_addr_ (url_addr),
+    containing_page_ (cp == 0 ? this : cp)
+{
+  ACE_DEBUG ((LM_DEBUG, "HTTP_URL %s\n", url_addr.addr_to_string ()));
+}
+
+ssize_t
+HTTP_URL::send_request (void)
+{
+  size_t commandsize =
+    ACE_OS::strlen (this->url_addr ().get_path_name ())
+    + ACE_OS::strlen (this->url_addr ().get_host_name ())
+    + 20 // Extra
+    + 1 // NUL byte
+    + 16; // Protocol filler...
+
+  char *command;
+  ACE_NEW_RETURN (command,
+                  char[commandsize],
+                  -1);
+
+  // Ensure that the <command> memory is deallocated.
+  ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
+
+  ACE_OS::sprintf (cmd_ptr.get (),
+                   "GET /%s HTTP/1.1\r\n",
+                   ACE_TEXT_ALWAYS_CHAR (this->url_addr ().get_path_name ()));
+
+  // Send the GET command to the connected server.
+  if (this->stream ().send_n (cmd_ptr.get (),
+                              ACE_OS::strlen (cmd_ptr.get ()),
+                              const_cast<ACE_Time_Value *>
+                                (OPTIONS::instance ()->timeout ()))  > 0)
+    {
+      ACE_OS::sprintf (cmd_ptr.get (),
+                       "Host: %s\r\n\r\n",
+                       this->url_addr ().get_host_name ());
+
+      // IMP: The length of teh command has to be sent!
+      ssize_t retval =
+        this->stream ().send_n (cmd_ptr.get (),
+                                ACE_OS::strlen (cmd_ptr.get ()),
+                                const_cast<ACE_Time_Value *>
+                                  (OPTIONS::instance ()->timeout ()));
+      this->stream ().svc_handler ()->idle (0);
+      if (retval <= 0)
+        return -1;
+      else
+        return retval;
+    }
+  else
+    return -1;
+}
+
+int
+HTTP_URL::accept (URL_Visitor *visitor)
+{
+  // This is part of the visitor pattern.
+  return visitor->visit (*this);
+}
+
+int
+HTTP_URL::destroy (void)
+{
+  delete this;
+  return 0;
+  // Commit suicide!
+}
diff --git a/ACE/examples/Web_Crawler/HTTP_URL.h b/ACE/examples/Web_Crawler/HTTP_URL.h
new file mode 100644
index 00000000000..a926bb47938
--- /dev/null
+++ b/ACE/examples/Web_Crawler/HTTP_URL.h
@@ -0,0 +1,64 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    HTTP_URL.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _HTTP_URL_H
+#define _HTTP_URL_H
+
+#include "URL_Status.h"
+#include "URL.h"
+#include "Options.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+class HTTP_URL : public URL
+{
+  // = TITLE
+  //    An ADT for an HTTP URL.
+  // 
+  // = DESCRIPTION
+  //    This class plays the "element" role in the Visitor pattern.
+public:
+  HTTP_URL (const ACE_URL_Addr &url_addr,
+            HTTP_URL *containing_page = 0);
+  // The <url_addr> is the URL that we're going to be visiting.  We
+  // also keep track of the containing page, if any, which is used to
+  // print out more meaningful messages.
+
+  virtual int accept (URL_Visitor *visitor);
+  // Accept the visitor, which will then perform a particular
+  // visitation strategy on the URL.  This method is part of the
+  // Visitor pattern.
+
+  virtual ssize_t send_request (void);
+  // Send a <GET> command to fetch the contents in the URI from the
+  // server.
+  
+  virtual const ACE_URL_Addr &url_addr (void) const;
+  // Returns the URL that we represent.
+
+  int destroy (void);
+  // Commit suicide
+private:
+  ACE_URL_Addr url_addr_;
+  // Address of the URL we're connected to.
+
+  HTTP_URL *containing_page_;
+  // Page that contained us.
+};
+
+#endif /* _HTTP_URL_H */
diff --git a/ACE/examples/Web_Crawler/Iterators.cpp b/ACE/examples/Web_Crawler/Iterators.cpp
new file mode 100644
index 00000000000..98b4f999622
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Iterators.cpp
@@ -0,0 +1,163 @@
+// $Id$
+
+#include "Options.h"
+#include "Iterators.h"
+
+ACE_RCSID(Web_Crawler, Iterators, "$Id$")
+
+URL_Iterator::~URL_Iterator (void)
+{
+}
+
+int
+URL_Iterator::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+HTML_Body_Iterator::HTML_Body_Iterator (URL &url)
+  : url_ (url)
+{
+}
+
+int
+HTML_Body_Iterator::next (ACE_CString &url)
+{
+  size_t len = BUFSIZ;
+  const char *buf;
+  ACE_CString buffer;
+  int href_index = 0;
+
+  for (buf = this->url_.stream ().recv (len);
+       buf > 0;
+       buf = this->url_.stream ().recv (len))
+    {
+
+      buffer.set (buf, BUFSIZ, 1);
+
+      href_index = buffer.find ("HREF");
+
+      if (href_index < 0)
+        href_index = buffer.find ("href");
+
+      // Grep fpr " and grab the string until end-"
+      if ( href_index > 0)
+        {
+          // Get back to buffer start location.
+          this->url_.stream ().seek (-1 * static_cast<off_t> (len),
+                                     SEEK_CUR);
+
+          int start_index = buffer.find ('\"',
+                                         href_index);
+          if (start_index <= 0)
+            break;
+
+          start_index += href_index;
+
+          int end_index = buffer.find ('\"',
+                                       start_index + 1);
+          if (end_index <= 0)
+            break;
+
+          end_index += start_index + 1;
+
+          ssize_t url_len = end_index - (start_index + 1);
+
+          ACE_CString temp = buffer.substring (start_index + 1,
+                                               url_len);
+          url.set (temp.c_str (), len, 1);
+
+          this->url_.stream ().seek (end_index + 1);
+
+          return url_len;
+        }
+    }
+  return 0;
+
+}
+
+HTTP_Header_Iterator::HTTP_Header_Iterator (URL &url)
+  : url_ (url),
+    end_of_header_ (0)
+{
+}
+
+int
+HTTP_Header_Iterator::next (ACE_CString &line)
+{
+  if (this->end_of_header_)
+    return 0;
+  else
+    {
+      for (char c;
+           (c = this->url_.stream ().get_char ()) != (char)EOF;
+           )
+        {
+          // Check to see if we're at the end of the header line.
+          if (c == '\r' && this->url_.stream ().peek_char (0) == '\n')
+            {
+              line.set (this->url_.stream ().recv (),
+                        this->url_.stream ().recv_len () - 1,
+                        1);
+
+              // Check to see if we're at the end of the header.
+              if (this->url_.stream ().peek_char (1) == '\r'
+                  && this->url_.stream ().peek_char (2) == '\n')
+                {
+                  this->end_of_header_ = 1;
+                  // We're at the end of the header section.
+                  this->url_.stream ().seek (3);
+                }
+              else
+                // We're at the end of the line.
+                this->url_.stream ().seek (1);
+
+              return 1;
+            }
+          // Handle broken Web servers that use '\n' instead of
+          // '\r\n'.
+          else if (c == '\n')
+            {
+              line.set (this->url_.stream ().recv (),
+                        (this->url_.stream ().recv_len ()),
+                        1);
+
+              // Check to see if we're at the end of the header.
+              if (this->url_.stream ().peek_char (0) == '\n')
+                {
+                  // We're at the end of the header section.
+                  this->url_.stream ().seek (1);
+                  this->end_of_header_ = 1;
+                }
+
+              return 1;
+            }
+        }
+
+    }
+  return 0;
+}
+
+URL_Download_Iterator::URL_Download_Iterator (URL &url)
+  : url_ (url)
+{
+}
+
+int
+URL_Download_Iterator::next (ACE_CString &buffer)
+{
+  size_t len = BUFSIZ;
+
+  const char *buf = this->url_.stream ().recv (len);
+
+
+  if (buf == 0)
+    return 0;
+  else
+    {
+      buffer.set (buf, len, 1);
+      return 1;
+    }
+}
diff --git a/ACE/examples/Web_Crawler/Iterators.h b/ACE/examples/Web_Crawler/Iterators.h
new file mode 100644
index 00000000000..b5d267f7afb
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Iterators.h
@@ -0,0 +1,117 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    Iterators.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _ITERATORS_H
+#define _ITERATORS_H
+
+#include "URL.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+class URL_Iterator
+{
+  // = TITLE
+  //    An abstract base class that defines an iterator.
+  // 
+  // = DESCRIPTION
+  //    Subclasses of this base class can define what strings
+  //    to return from <next>.  This class decouples higher-level
+  //    software from the details of whatever type of URL header or
+  //    body we're iterating over.
+public:
+  // = Initialization and termination methods.
+  virtual int destroy (void);
+  // "virtual" destructor.
+
+  // = Iterator methods.
+  virtual int next (ACE_CString &string) = 0;
+  // Pass back the next <string> that hasn't been seen yet.  Returns 0
+  // when all items have been seen, else 1.
+
+protected:
+  virtual ~URL_Iterator (void);
+  // C++ destructor.
+};
+
+class HTML_Body_Iterator : public URL_Iterator
+{
+  // = TITLE
+  //    An iterator that returns URLs embedded in HTML files.
+public:
+  // = Initialization and termination methods.
+  HTML_Body_Iterator (URL &url);
+  // Constructor.
+
+  // = Iterator methods.
+  virtual int next (ACE_CString &url);
+  // Pass back the next <url> that hasn't been seen in the
+  // memory-mapped file.  Returns 0 when all items have been seen,
+  // else 1.
+
+private:
+  URL &url_;
+  // HTTP URL that we're iterating over.
+};
+
+class HTTP_Header_Iterator : public URL_Iterator
+{
+  // = TITLE
+  //    An iterator that iterates over the HTTP header.
+public:
+  // = Initialization and termination methods.
+  HTTP_Header_Iterator (URL &url);
+  // Constructor.
+
+  // = Iterator methods.
+  virtual int next (ACE_CString &line);
+  // Pass back the next <line> that hasn't been seen in the
+  // memory-mapped file header.  Returns 0 when we've reached the end
+  // of the header.  seen, else 1.
+
+private:
+  URL &url_;
+  // HTTP URL that we're iterating over.
+
+  int end_of_header_;
+  // We've found the end of the header, which means this iterator is
+  // finished.
+};
+
+class URL_Download_Iterator : public URL_Iterator
+{
+  // = TITLE
+  //    An iterator that iterates over the contents of an entire URL,
+  //    i.e., both header and body, and returns it in <BUFSIZ>
+  //    <buffer>s. 
+public:
+  // = Initialization and termination methods.
+  URL_Download_Iterator (URL &url);
+  // Constructor.
+
+  // = Iterator methods.
+  virtual int next (ACE_CString &buffer);
+  // Pass back the next <buffer> data from the stream, where
+  // <buffer.size> <= <BUFSIZ> .  Returns 0 when we've reached the end
+  // of the header, else 1.
+
+private:
+  URL &url_;
+  // HTTP URL that we're iterating over.
+};
+
+#endif /* _ITERATORS_H */
diff --git a/ACE/examples/Web_Crawler/Makefile.am b/ACE/examples/Web_Crawler/Makefile.am
new file mode 100644
index 00000000000..00a30a4d2e1
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Makefile.am
@@ -0,0 +1,60 @@
+##  Process this file with automake to create Makefile.in
+##
+## $Id$
+##
+## This file was generated by MPC.  Any changes made directly to
+## this file will be lost the next time it is generated.
+##
+## MPC Command:
+## /acebuilds/ACE_wrappers-repository/bin/mwc.pl -include /acebuilds/MPC/config -include /acebuilds/MPC/templates -feature_file /acebuilds/ACE_wrappers-repository/local.features -noreldefs -type automake -exclude build,Kokyu
+
+ACE_BUILDDIR = $(top_builddir)
+ACE_ROOT = $(top_srcdir)
+
+
+## Makefile.Web_Crawler.am
+
+if !BUILD_ACE_FOR_TAO
+noinst_PROGRAMS = main
+
+main_CPPFLAGS = \
+  -I$(ACE_ROOT) \
+  -I$(ACE_BUILDDIR)
+
+main_SOURCES = \
+  Command_Processor.cpp \
+  HTTP_URL.cpp \
+  Iterators.cpp \
+  Mem_Map_Stream.cpp \
+  Options.cpp \
+  URL.cpp \
+  URL_Addr.cpp \
+  URL_Status.cpp \
+  URL_Visitor.cpp \
+  URL_Visitor_Factory.cpp \
+  Web_Crawler.cpp \
+  main.cpp \
+  Command_Processor.h \
+  HTTP_URL.h \
+  Iterators.h \
+  Mem_Map_Stream.h \
+  Options.h \
+  URL.h \
+  URL_Addr.h \
+  URL_Status.h \
+  URL_Visitor.h \
+  URL_Visitor_Factory.h \
+  Web_Crawler.h
+
+main_LDADD = \
+  $(ACE_BUILDDIR)/ace/libACE.la
+
+endif !BUILD_ACE_FOR_TAO
+
+## Clean up template repositories, etc.
+clean-local:
+	-rm -f *~ *.bak *.rpo *.sym lib*.*_pure_* core core.*
+	-rm -f gcctemp.c gcctemp so_locations *.ics
+	-rm -rf cxx_repository ptrepository ti_files
+	-rm -rf templateregistry ir.out
+	-rm -rf ptrepository SunWS_cache Templates.DB
diff --git a/ACE/examples/Web_Crawler/Mem_Map_Stream.cpp b/ACE/examples/Web_Crawler/Mem_Map_Stream.cpp
new file mode 100644
index 00000000000..dda1d465a71
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Mem_Map_Stream.cpp
@@ -0,0 +1,240 @@
+// $Id$
+
+#include "ace/FILE_Addr.h"
+#include "ace/Auto_Ptr.h"
+#include "Options.h"
+#include "Mem_Map_Stream.h"
+
+ACE_RCSID(Web_Crawler, Mem_Map_Stream, "$Id$")
+
+ACE_SOCK_Stream &
+Mem_Map_Stream::stream (void)
+{
+  return svc_handler_->peer ();
+}
+
+ssize_t
+Mem_Map_Stream::send_n (const void *buf, size_t size, ACE_Time_Value *tv)
+{
+  return svc_handler_->peer ().send_n (buf, size, 0, tv);
+}
+
+int
+Mem_Map_Stream::eof (void) const
+{
+  return this->get_pos_ >= this->end_of_mapping_plus1_;
+}
+
+int
+Mem_Map_Stream::get_char (void)
+{
+  if (this->eof () && this->grow_file_and_remap () == -1)
+    return EOF;
+
+  return *this->get_pos_++;
+}
+
+int
+Mem_Map_Stream::rewind (void)
+{
+  this->recv_pos_ =
+    reinterpret_cast<char *> (this->mem_map_.addr ());
+  this->get_pos_ = this->recv_pos_;
+  this->end_of_mapping_plus1_ =
+    this->recv_pos_ + this->mem_map_.size ();
+  return 0;
+}
+
+int
+Mem_Map_Stream::peek_char (size_t offset)
+{
+  // We may need to iterate if the size of <n> is large.
+  while (this->get_pos_ + offset >= this->end_of_mapping_plus1_)
+    if (this->grow_file_and_remap () == -1)
+      return EOF;
+
+  return this->get_pos_[offset];
+}
+
+const char *
+Mem_Map_Stream::recv (void) const
+{
+  return this->recv_pos_;
+}
+
+const char *
+Mem_Map_Stream::recv (size_t &len)
+{
+  if (this->eof () && this->grow_file_and_remap () == -1)
+    return 0;
+
+  
+  const char *s = this->recv_pos_;
+  off_t olen = static_cast <off_t> (len);
+  this->seek (olen, SEEK_CUR);
+  len = this->get_pos_ - s;
+  return s;
+}
+
+size_t
+Mem_Map_Stream::recv_len (void) const
+{
+  return this->get_pos_ - this->recv_pos_;
+}
+
+const char *
+Mem_Map_Stream::peek_str (size_t offset, 
+                          size_t size)
+{
+  // We will iterate if the size of <offset> is large.
+  while (this->get_pos_ + (offset + size) > this->end_of_mapping_plus1_)
+    if (this->grow_file_and_remap () == -1)
+      return 0;
+
+  return &this->get_pos_[offset];
+}
+
+off_t
+Mem_Map_Stream::seek (off_t offset, int whence)
+{
+  switch (whence)
+    {
+    case SEEK_SET:
+      this->get_pos_ =
+        reinterpret_cast<char *> (this->mem_map_.addr ())
+        + offset;
+      break;
+
+    case SEEK_CUR:
+      this->get_pos_ += offset;
+      break;
+
+    case SEEK_END:
+      this->get_pos_ =
+        this->end_of_mapping_plus1_ + offset;
+      // @@ Not sure how to implement this (yet).
+      ACE_NOTSUP_RETURN (-1);
+      break;
+    }
+
+  // Make sure that the backing store will cover this.
+  while (this->get_pos_ > this->end_of_mapping_plus1_)
+    if (this->grow_file_and_remap () == -1)
+      return (off_t) -1;
+
+  this->recv_pos_ = this->get_pos_;
+  return this->recv_pos_ - reinterpret_cast<char *> (this->mem_map_.addr ());
+}
+
+Mem_Map_Stream::Svc_Handler * 
+Mem_Map_Stream::svc_handler (void)
+{
+  return this->svc_handler_;
+}
+
+
+int
+Mem_Map_Stream::open (STRAT_CONNECTOR *connector,
+                      const ACE_INET_Addr &addr)
+{
+  svc_handler_ = 0; 
+
+  // Connect to the server at <addr>. If the handler has to be 
+  // connected to the server again, the Caching strategy takes care
+  // and uses the same connection.
+  if (connector->connect (svc_handler_,
+                          addr) == -1)
+    {
+
+      ACE_ERROR_RETURN ((LM_ERROR,
+                         "%p %s %d\n",
+                         "Connect failed",
+                         addr.get_host_name (),
+                         addr.get_port_number ()),
+                        -1);
+    }
+  // Create a temporary filename.
+  ACE_FILE_Addr file (ACE_sap_any_cast (ACE_FILE_Addr &));
+
+  // Create the temporary file via the <ACE_Mem_Map> class API.
+  if (this->mem_map_.open (file.get_path_name (),
+                          O_RDWR | O_CREAT | O_APPEND,
+                          ACE_DEFAULT_FILE_PERMS) == -1)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "open"),
+                      -1);
+  // Make sure to unlink this right away so that if this process
+  // crashes these files will be removed automatically.
+#if 0
+  else if (ACE_OS::unlink (file.get_path_name ()) == -1)
+  ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "unlink"),
+                      -1);
+  else
+#endif
+    // Initialize all the position pointers to 0.
+    this->rewind ();
+ 
+  return 0;
+}
+
+int
+Mem_Map_Stream::grow_file_and_remap (void)
+{
+  char buf[BUFSIZ + 1];
+
+  // Copy the next chunk of bytes from the socket into the temporary
+  // file.
+  ACE_Time_Value tv (*OPTIONS::instance ()->timeout ());
+  
+  ssize_t n = this->svc_handler_->peer ().recv_n (buf,
+                                                  sizeof buf,
+                                                  0,
+                                                  &tv);
+  if (n == -1)
+    {
+      if (OPTIONS::instance ()->debug ())
+        ACE_ERROR ((LM_ERROR,
+                    "%p\n",
+                    "recv"));
+      return -1;
+    }
+  else if (n == 0)
+    return -1;
+  else if (ACE::write_n (this->mem_map_.handle (), buf, n) != n)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "write_n"),
+                      -1);
+
+  // Grow the memory-mapping to encompass the entire temporary file.
+  if (this->mem_map_.map (-1,
+                          PROT_RDWR,
+                          ACE_MAP_PRIVATE | ACE_MAP_FIXED,
+                          ACE_DEFAULT_BASE_ADDR) == -1)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "map"),
+                      -1);
+  // MAP_FAILED is used as a "first time in" flag.
+  if (this->recv_pos_ == MAP_FAILED)
+    {
+      this->recv_pos_ = reinterpret_cast<char *> (this->mem_map_.addr ());
+      this->get_pos_ = this->recv_pos_;
+    }
+
+  this->end_of_mapping_plus1_ =
+    reinterpret_cast<char *> (this->mem_map_.addr ())
+    + this->mem_map_.size ();
+
+  return 0;
+}
+
+Mem_Map_Stream::~Mem_Map_Stream (void)
+{
+  // Remove the mapping and the file.
+  this->mem_map_.remove ();
+}
+
diff --git a/ACE/examples/Web_Crawler/Mem_Map_Stream.h b/ACE/examples/Web_Crawler/Mem_Map_Stream.h
new file mode 100644
index 00000000000..3595f04ab77
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Mem_Map_Stream.h
@@ -0,0 +1,190 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//     examples/Web_Crawler
+//
+// = FILENAME
+//    Mem_Map_Stream.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _MEM_MAP_STREAM_H
+#define _MEM_MAP_STREAM_H
+#include /**/ "ace/pre.h"
+
+#include "ace/SOCK_Stream.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+
+#include "ace/Mem_Map.h"
+#include "ace/SOCK_Connector.h"
+#include "ace/Connector.h"
+#include "ace/Svc_Handler.h"
+#include "ace/Strategies_T.h"
+
+class Mem_Map_Stream
+{
+  // = TITLE
+  //   Provides a memory-mapped stream abstraction to simplify parsing
+  //   of tokens.
+  //
+  // = DESCRIPTION
+  //   This class makes it possible to treat an connection as a stream
+  //   of bytes, similar to the C library stdio streams.  The contents
+  //   of the connection are buffered incrementally in a memory-mapped
+  //   file.  This class maintains pointers to two positions in the
+  //   stream:
+  //
+  //   1. The <recv> position, which keeps track of the beginning of a
+  //      token that is in the stream.
+  //
+  //   2. The <get> position, which moves along character-by-character
+  //      until the end of the token is reached.
+  //
+  //   Once a token has been located, it can be extracted from the
+  //   stream by calling the <recv>.  The length of the token, i.e.,
+  //   the <recv_len>, is the length in bytes between the <get>
+  //   position and the <recv> position.  Once the token has been
+  //   extracted, the <recv> and <get> positions can be updated by the
+  //   <seek> method.
+
+public:
+  typedef ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> Svc_Handler;
+
+  typedef ACE_Strategy_Connector<Svc_Handler,
+                               ACE_SOCK_CONNECTOR>
+        STRAT_CONNECTOR;
+
+  // Mem_Map_Stream (void);
+  // constructor added:KIRTHIKA
+  virtual int open (STRAT_CONNECTOR *connector,
+                    const ACE_INET_Addr &);
+  // Initialize this object.
+
+  virtual ~Mem_Map_Stream (void);
+  // Destructor.
+
+  // = Accessor.
+  ACE_SOCK_Stream &stream (void);
+  // Returns the underlying <ACE_SOCK_Stream>.
+
+  // = I/O methods.
+
+  virtual ssize_t send_n (const void *buf,
+                          size_t size,
+                          ACE_Time_Value *tv = 0);
+  // Send <size> bytes in <buf> to the connected peer.  This is a
+  // completely unbuffered call.
+
+  virtual int get_char (void);
+  // Return the next character in the stream and advance the <get>
+  // position.  Returns EOF when the <get> position reaches the end of
+  // the HTTP stream.
+
+  virtual const char *recv (size_t &len);
+  // Returns a pointer to array of at most <len> characters starting
+  // at the <recv> position.  If the <recv> position + <len> extends
+  // past the EOF then <len> is set to the number of characters
+  // between the <recv> position and the EOF and both the <get> and
+  // <recv> positions are advanced by <len>.  Returns 0 if the <recv>
+  // position is at the EOF.
+
+  virtual const char *recv (void) const;
+  // Returns a pointer to array of characters starting at the <recv>
+  // position.
+
+  virtual size_t recv_len (void) const;
+  // Returns the length in bytes between the <get> position and the
+  // <recv> position.
+
+  virtual int rewind (void);
+  // Resets the <get> and <recv> positions to the beginning of the
+  // stream.  This works since all the data has been cached in the
+  // memory-mapped backing store.
+
+  virtual int peek_char (size_t offset);
+  // Returns the nth character <offset> from the <get> position in the
+  // stream without advancing the <get> position.  Automatically
+  // extends the backing store if necessary.  Returns EOF if <offset>
+  // is past the end of the stream.
+
+  virtual const char *peek_str (size_t offset, size_t size);
+  // Return a pointer to an array of <size> characters starting at
+  // <offset> characters from the <get> position in the stream without
+  // advancing the <get> position.  Automatically extends the backing
+  // store if necessary.  Returns 0 if <offset> or <offset + size> is
+  // past the end of the stream.
+
+  virtual off_t seek (off_t offset, int whence = SEEK_CUR);
+  // Sets the <get> and <recv> positions as follows:
+  //    o If <whence> is <SEEK_SET>, the positions are set to <offset>
+  //      bytes from the start of the stream.
+  //
+  //    o  If <whence> is <SEEK_CUR>, the positions are set to the
+  //       current <get> position plus <offset>.
+  //
+  //    o  If <whence> is <SEEK_END>, the positions are set to the size
+  //       of the stream plus <offset>.
+
+  virtual int eof (void) const;
+  // Returns 1 if we're at the end of the HTTP stream, else 0.
+
+
+  /*
+  typedef ACE_NOOP_Creation_Strategy<Svc_Handler>
+  NULL_CREATION_STRATEGY;
+  typedef ACE_NOOP_Concurrency_Strategy<Svc_Handler>
+  NULL_ACTIVATION_STRATEGY;
+  typedef ACE_Cached_Connect_Strategy<Svc_Handler,
+                                    ACE_SOCK_CONNECTOR,
+                                    ACE_SYNCH_NULL_MUTEX>
+                                    CACHED_CONNECT_STRATEGY;*/
+
+  Svc_Handler *svc_handler (void);
+
+private:
+  int grow_file_and_remap (void);
+  // Grow the file by reading another chunk from the HTTP socket and
+  // extend the mapping to cover this chunk.  Returns -1 on failure or
+  // EOF, else 0.
+
+  //ACE_SOCK_Stream stream_;
+
+  Svc_Handler *svc_handler_;
+  // Connection to peer. The granularity is at the Svc_Handler level.
+  // The Svc_Handler has an SOCK_Stream.
+  /*
+  NULL_CREATION_STRATEGY creation_strategy_;
+  NULL_ACTIVATION_STRATEGY activation_strategy_;
+  // Configure the Strategy Connector with a strategy that caches
+  // connection.
+  CACHED_CONNECT_STRATEGY caching_connect_strategy_;
+
+  STRAT_CONNECTOR *strat_connector_;  */
+
+  ACE_Mem_Map mem_map_;
+  // Memory-mapped file that we're iterating over.
+
+  char *recv_pos_;
+  // Pointer to the address where the next <recv> method will start.
+
+  char *get_pos_;
+  // Pointer to the address where the next <get_char> method will
+  // start.
+
+  char *end_of_mapping_plus1_;
+  // Address at the end of the file mapping.
+
+};
+
+#include /**/ "ace/post.h"
+#endif /* _MEM_MAP_STREAM_H */
diff --git a/ACE/examples/Web_Crawler/Options.cpp b/ACE/examples/Web_Crawler/Options.cpp
new file mode 100644
index 00000000000..389cbfa0733
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Options.cpp
@@ -0,0 +1,177 @@
+// $Id$
+
+#include "ace/Get_Opt.h"
+#include "ace/Log_Msg.h"
+#include "URL_Addr.h"
+#include "Options.h"
+#include "ace/OS_NS_string.h"
+
+ACE_RCSID(Web_Crawler, Options, "$Id$")
+
+int 
+Options::parse_args (int argc, ACE_TCHAR *argv[])
+{
+  ACE_Get_Opt getopt (argc, argv, ACE_TEXT ("df:h:i:l:rt:u:vo:p:"));
+
+  ACE_LOG_MSG->open (argv[0]);
+
+  this->hostname_ = ACE_TEXT ("www.cs.wustl.edu");
+  this->uri_ = ACE_TEXT ("index.html");
+  this->recurse_ = 0;
+  this->debug_ = 0;
+  this->timeout_.sec (ACE_DEFAULT_TIMEOUT);
+  this->url_filter_ = 0;
+  this->verbose_ = 0;
+  this->order_ = ACE_TEXT ("FIFO");
+  this->port_no_ = ACE_DEFAULT_HTTP_PORT;
+  
+  // The default is to make this limit as large as possible.
+  this->handle_limit_ = -1;
+
+  for (int c;
+       (c = getopt ()) != EOF;
+       )
+    switch (c)
+      {
+      case ACE_TEXT ('d'):
+        this->debug_ = 1;
+        break;
+      case ACE_TEXT ('f'):
+        this->url_filter_ = getopt.opt_arg ();
+        break;
+      case ACE_TEXT ('h'):
+        this->hostname_ = getopt.opt_arg ();
+        break;
+      case ACE_TEXT ('i'):
+        this->uri_ = getopt.opt_arg ();
+        break;
+      case ACE_TEXT ('l'):
+        this->handle_limit_ = ACE_OS::atoi (getopt.opt_arg ());
+        break;
+      case ACE_TEXT ('r'):
+        this->recurse_ = 1;
+        break;
+      case ACE_TEXT ('t'):
+        this->timeout_.sec (ACE_OS::atoi (getopt.opt_arg ()));
+        break;
+      case ACE_TEXT ('u'):
+        {
+          this->hostname_ = getopt.opt_arg ();
+          ACE_TCHAR *s = ACE_OS::strchr (getopt.opt_arg (), ACE_TEXT ('/'));
+          if (s != 0)
+            {
+              this->uri_ = s + 1;
+              *s = ACE_TEXT ('\0');
+            }
+          else
+            ACE_ERROR ((LM_ERROR,
+                        ACE_TEXT ("invalid URL %s\n"),
+                        getopt.opt_arg ()));
+        }
+        break;
+      case ACE_TEXT ('v'):
+        this->verbose_ = 1;
+        break;
+      case ACE_TEXT ('o'):
+        {
+        this->order_ = getopt.opt_arg ();
+        }
+        break;
+      case ACE_TEXT ('p'):
+        this->port_no_ = ACE_OS::atoi (getopt.opt_arg ());
+        break;
+      default:
+        ACE_ERROR ((LM_ERROR,
+                    ACE_TEXT ("usage: %n [-d] [-f filter] [-h hostname]")
+                    ACE_TEXT (" [-l handle-limit] [-r] [-t timeout] [-u URI]")
+                    ACE_TEXT (" [-v]\n%a"),
+                    1));
+
+        /* NOTREACHED */
+      }
+
+  return 0;
+}
+
+int 
+Options::port_no (void) const
+{
+  return this->port_no_;
+}
+
+int
+Options::recurse (void) const
+{
+  return this->recurse_;
+}
+
+const ACE_Time_Value *
+Options::timeout (void) const
+{
+  return &this->timeout_;
+}
+
+int
+Options::debug (void) const
+{
+  return this->debug_;
+}
+
+int
+Options::verbose (void) const
+{
+  return this->verbose_;
+}
+
+const ACE_TCHAR *
+Options::order (void) const
+{
+  return this->order_;
+}
+const ACE_TCHAR *
+Options::hostname (void) const
+{
+  return this->hostname_;
+}
+
+const ACE_TCHAR *
+Options::path_name (void) const
+{
+  return this->uri_;
+}
+
+const ACE_TCHAR *
+Options::url_filter (void) const
+{
+  return this->url_filter_;
+}
+
+Command_Processor *
+Options::command_processor (void) const
+{
+  return this->command_processor_;
+}
+
+void
+Options::command_processor (Command_Processor *cp)
+{
+  this->command_processor_ = cp;
+}
+
+URL_Visitor *
+Options::visitor (void) const
+{
+  return this->visitor_;
+}
+
+void
+Options::visitor (URL_Visitor *v)
+{
+  this->visitor_ = v;
+}
+
+int 
+Options::handle_limit (void)
+{
+  return this->handle_limit_;
+}
diff --git a/ACE/examples/Web_Crawler/Options.h b/ACE/examples/Web_Crawler/Options.h
new file mode 100644
index 00000000000..ef5f2efd40c
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Options.h
@@ -0,0 +1,124 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    Options.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+#include "ace/Null_Mutex.h"
+#include "ace/Singleton.h"
+#include "ace/Time_Value.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+// Forward decls.
+class Command_Processor;
+class URL_Visitor;
+
+class Options
+{
+  // = TITLE
+  //   Maintains the global options.
+  //
+  // = DESCRIPTION
+  //   This class is converted into a Singleton by the 
+  //   <ACE_Singleton> template.
+public:
+  int parse_args (int argc, ACE_TCHAR *argv[]);
+  // Parse the command-line arguments and initialize the options.
+
+  int recurse (void) const;
+  // If non-0 and the link is an HTML file then recursively check all
+  // links that are embedded in the body of file.
+
+  const ACE_TCHAR *hostname (void) const;
+  // Return the hostname of the initial Web server.
+
+  const ACE_TCHAR *path_name (void) const;
+  // Return the initial URI.
+
+  const ACE_TCHAR *url_filter (void) const;
+  // String used to filter out which URLs to validate.
+
+  int debug (void) const;
+  // Are we debugging?
+
+  int verbose (void) const;
+  // Are we being verbose?
+
+  const ACE_TCHAR *order (void) const;
+  // Which order? LIFO|FIFO??
+
+  int port_no (void) const;
+  // Port #
+
+  const ACE_Time_Value *timeout (void) const;
+  // Return the timeout used to prevent hanging on <recv> and
+  // <connect> calls to broken servers.
+
+  // = Get/set the <Command_Processor>.
+  Command_Processor *command_processor (void) const;
+  void command_processor (Command_Processor *);
+
+  // = Get/set the <URL_Visitor>.
+  URL_Visitor *visitor (void) const;
+  void visitor (URL_Visitor *);
+
+  // Get the handle_limit.
+  int handle_limit (void);
+private:  
+  int recurse_;
+  // Are we recursving.
+
+  const ACE_TCHAR *hostname_;
+  // Initial Web server name.
+
+  const ACE_TCHAR *uri_;
+  // Initial URI name.
+
+  int debug_;
+  // Are we debugging?
+
+  int verbose_;
+  // Are we being verbose?
+
+  const ACE_TCHAR *order_;
+  // Whether the URLs are traversed in FIFO or LIFO order.
+
+  ACE_Time_Value timeout_;
+  // Timeout on <recv> and <connect> to broken Web servers.
+
+  const ACE_TCHAR *url_filter_;
+  // String used to filter out which URLs to validate.
+
+  Command_Processor *command_processor_;
+  // Pointer to the Command_Processor.
+
+  URL_Visitor *visitor_;
+  // Pointer to the <URL_Visitor>.
+
+  int port_no_;
+  // Port no.
+  
+  int handle_limit_;
+  // The limit of the number of descriptors to be given for this process.
+};
+
+// Typedef an Options Singleton. 
+typedef ACE_Singleton <Options, ACE_Null_Mutex> OPTIONS;
+
+#endif /* _OPTIONS_H */
diff --git a/ACE/examples/Web_Crawler/README b/ACE/examples/Web_Crawler/README
new file mode 100644
index 00000000000..4f81809173d
--- /dev/null
+++ b/ACE/examples/Web_Crawler/README
@@ -0,0 +1,25 @@
+Web Crawler                                     Kirthika Parameswaran
+-----------
+
+The Web Crawler follows the  HTTP_1.1 protocol.
+ 
+This Crawler crawls in  either FIFO or LIFO order over the URLs
+now stored in a ACE_Unbounded_Queue. The Command Processor pattern is 
+used in this example. 
+
+Also the auto-purging feature where connections are removed from the cache
+when the process runs out of file descriptors, is added to this example.
+
+[Use the -l option to set the handle limit].
+
+Run:
+---
+
+
+> make 
+
+> main -r -u www.cs.wustl.edu/~kirthika/test.html -o LIFO
+
+or
+
+> main -r -u www.cs.wustl.edu/~kirthika/test.html -o FIFO
diff --git a/ACE/examples/Web_Crawler/URL.cpp b/ACE/examples/Web_Crawler/URL.cpp
new file mode 100644
index 00000000000..ce52ed892ad
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL.cpp
@@ -0,0 +1,39 @@
+// $Id$
+
+#include "URL.h"
+
+ACE_RCSID(Web_Crawler, URL, "$Id$")
+
+Mem_Map_Stream &
+URL::stream (void)
+{
+  return this->stream_;
+}
+
+URL::~URL (void)
+{
+}
+
+const URL_Status &
+URL::reply_status (void)
+{
+  return this->reply_status_;
+}
+
+void 
+URL::reply_status (const URL_Status &rs)
+{
+  this->reply_status_ = rs;
+}
+
+const ACE_CString &
+URL::content_type (void)
+{
+  return this->content_type_;
+}
+
+void 
+URL::content_type (const ACE_CString &ct)
+{
+  this->content_type_ = ct;
+}
diff --git a/ACE/examples/Web_Crawler/URL.h b/ACE/examples/Web_Crawler/URL.h
new file mode 100644
index 00000000000..68c41f018ad
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL.h
@@ -0,0 +1,82 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    URL.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _URL_H
+#define _URL_H
+
+#include "Mem_Map_Stream.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+#include "URL_Addr.h"
+#include "URL_Status.h"
+
+#include "ace/SString.h"
+
+
+// Forward declaration.
+class URL_Visitor;
+
+class URL
+{
+  // = TITLE
+  //    Base class for a URL.
+  //
+  // = DESCRIPTION
+  //    This class plays a role in the Visitor pattern.
+public:
+  virtual ~URL (void);
+  // Destructor.
+
+  virtual int accept (URL_Visitor *visitor) = 0;
+  // Accept the visitor, which will then perform a particular
+  // visitation strategy on the URL.  This method is part of the
+  // Visitor pattern.
+
+  virtual ssize_t send_request (void) = 0;
+  // Send a <GET> command to fetch the contents in the URI from the
+  // server.
+
+  virtual const ACE_URL_Addr &url_addr (void) const = 0;
+  // Returns the URL that we represent.
+
+  virtual Mem_Map_Stream &stream (void);
+  // Returns the <Mem_Map_Stream>.
+
+  // = Get/set the reply status.
+  virtual const URL_Status &reply_status (void);
+  virtual void reply_status (const URL_Status &);
+
+  // = Get/set the reply status.
+  virtual const ACE_CString &content_type (void);
+  virtual void content_type (const ACE_CString &);
+
+
+
+private:
+  URL_Status reply_status_;
+  // Reply status of the URL.
+
+  ACE_CString content_type_;
+  // Content-type of the URL.
+
+  Mem_Map_Stream stream_;
+  // Contents of the stream.
+};
+
+#endif /* _URL_H */
diff --git a/ACE/examples/Web_Crawler/URL_Addr.cpp b/ACE/examples/Web_Crawler/URL_Addr.cpp
new file mode 100644
index 00000000000..5a630e387fb
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Addr.cpp
@@ -0,0 +1,234 @@
+// $Id$
+
+#include "URL_Addr.h"
+#include "ace/Log_Msg.h"
+#include "ace/OS_NS_string.h"
+#include "ace/OS_NS_stdio.h"
+#include "ace/OS_NS_stdlib.h"
+#include "ace/OS_Memory.h"
+
+ACE_RCSID (Web_Crawler,
+           URL_Addr,
+           "$Id$")
+
+
+ACE_URL_Addr::ACE_URL_Addr (void)
+  : path_name_ (0),
+    addr_string_ (0),
+    addr_string_len_ (0)
+{
+}
+
+int
+ACE_URL_Addr::addr_to_string (ACE_TCHAR *s,
+                              size_t size,
+                              int ipaddr_format) const
+{
+  const size_t total_len =
+    ACE_OS::strlen (ipaddr_format == 0 ?
+                    this->get_host_name () :
+                    this->get_host_addr ())
+    + ACE_OS::strlen ("65536") // Assume the max port number.
+    + ACE_OS::strlen (this->get_path_name ())
+    + sizeof (':')
+    + sizeof ('/')
+    + sizeof ('\0'); // For trailing '\0'.
+
+  if (size < total_len)
+    return -1;
+  else
+    {
+      ACE_OS::sprintf (s, ACE_TEXT ("%s:%d/%s"),
+                       ACE_TEXT_CHAR_TO_TCHAR (ipaddr_format == 0
+                                               ? this->get_host_name ()
+                                               : this->get_host_addr ()),
+                       this->get_port_number (),
+                       this->get_path_name ());
+      return 0;
+    }
+}
+
+const ACE_TCHAR *
+ACE_URL_Addr::addr_to_string (int ipaddr_format) const
+{
+  ACE_URL_Addr *this_ptr = const_cast<ACE_URL_Addr *> (this);
+
+  size_t size =
+    ACE_OS::strlen (ipaddr_format == 0 ?
+                    this->get_host_name () :
+                    this->get_host_addr ())
+    + ACE_OS::strlen ("65536") // Assume the max port number.
+    + ACE_OS::strlen (this->get_path_name ())
+    + sizeof (':')
+    + sizeof ('/')
+    + sizeof ('\0'); // For trailing '\0'.
+
+  if (size > this->addr_string_len_)
+    {
+      ACE_ALLOCATOR_RETURN (this_ptr->addr_string_,
+                            (ACE_TCHAR *) ACE_OS::realloc ((void *) this->addr_string_,
+                                                           size),
+                            0);
+      this_ptr->addr_string_len_ = size;
+    }
+  ACE_OS::sprintf (this->addr_string_,
+                   ACE_TEXT ("%s:%d/%s"),
+                   ACE_TEXT_CHAR_TO_TCHAR (ipaddr_format == 0
+                                           ? this->get_host_name ()
+                                          : this->get_host_addr ()),
+                   this->get_port_number (),
+                   this->get_path_name ());
+  return this->addr_string_;
+}
+
+int
+ACE_URL_Addr::string_to_addr (const ACE_TCHAR *s)
+{
+  int result;
+  ACE_TCHAR *t;
+
+  // Need to make a duplicate since we'll be overwriting the string.
+  ACE_ALLOCATOR_RETURN (t,
+                        ACE_OS::strdup (s),
+                        -1);
+
+
+  // First split off the path_name.
+
+  ACE_TCHAR *path_name = ACE_OS::strchr (t, ACE_TEXT ('/'));
+  const ACE_TCHAR *name = ACE_TEXT ("index.html");
+  if (path_name != 0)
+    {
+      if (ACE_OS::strlen (path_name + 1) > 0)
+        name = path_name + 1;
+
+      *path_name = '\0';
+    }
+
+  ACE_ALLOCATOR_RETURN (this->path_name_,
+                        // Skip over '/'
+                        ACE_OS::strdup (name),
+                        -1);
+
+  // Now handle the host address and port number.
+  ACE_TCHAR *port_number = ACE_OS::strchr (t, ':');
+
+  if (port_number == 0)
+    {
+      // Assume it's an ip-address or ip-number.
+      result = this->ACE_INET_Addr::set (ACE_DEFAULT_HTTP_PORT,
+                                         t);
+    }
+  else
+    {
+      *port_number = '\0';
+      u_short port = (u_short) ACE_OS::atoi (port_number + 1); // Skip over ':'
+      result = this->ACE_INET_Addr::set (port, t);
+    }
+
+  ACE_OS::free (ACE_MALLOC_T (t));
+  return result;
+}
+
+ACE_URL_Addr::ACE_URL_Addr (const ACE_URL_Addr &addr)
+  : ACE_INET_Addr (),
+    path_name_ (0),
+    addr_string_ (0),
+    addr_string_len_ (0)
+{
+  if (this->set (addr) == -1)
+    ACE_ERROR ((LM_ERROR,
+                ACE_TEXT ("%p\n"),
+                ACE_TEXT ("ACE_URL_Addr::ACE_URL_Addr")));
+}
+
+int
+ACE_URL_Addr::set (const ACE_URL_Addr &addr)
+{
+  ACE_OS::free (reinterpret_cast<void *> (const_cast<ACE_TCHAR *>
+                                                      (this->path_name_)));
+  ACE_OS::free (reinterpret_cast<void *> (const_cast<ACE_TCHAR *>
+                                                      (this->addr_string_)));
+  if (this->ACE_INET_Addr::set (addr) == -1)
+    return -1;
+  else
+    {
+      if (addr.path_name_)
+        ACE_ALLOCATOR_RETURN (this->path_name_,
+                              ACE_OS::strdup (addr.path_name_),
+                              -1);
+      if (addr.addr_string_)
+        ACE_ALLOCATOR_RETURN (this->addr_string_,
+                              ACE_OS::strdup (addr.addr_string_),
+                              -1);
+      this->addr_string_len_ =
+        addr.addr_string_len_;
+      return 0;
+    }
+}
+
+void
+ACE_URL_Addr::operator= (const ACE_URL_Addr &addr)
+{
+  if (this->set (addr) == -1)
+    ACE_ERROR ((LM_ERROR,
+                ACE_TEXT ("%p\n"),
+                ACE_TEXT ("ACE_URL_Addr::ACE_URL_Addr")));
+}
+
+u_long
+ACE_URL_Addr::hash (void) const
+{
+  u_long result = this->ACE_INET_Addr::hash ()
+    + ACE::hash_pjw (this->get_path_name ());
+
+  return result;
+}
+
+bool
+ACE_URL_Addr::operator== (const ACE_URL_Addr &addr) const
+{
+  return ACE_OS::strcmp (addr.get_path_name (),
+                         this->get_path_name ()) == 0
+    && addr.get_port_number () == this->get_port_number ()
+    && addr.get_ip_address () == this->get_ip_address ();
+}
+
+bool
+ACE_URL_Addr::operator!= (const ACE_URL_Addr &addr) const
+{
+  return !(*this == addr);
+}
+
+ACE_URL_Addr::ACE_URL_Addr (const ACE_TCHAR *host_name,
+                            const ACE_TCHAR *path_name,
+                            u_short port)
+  : ACE_INET_Addr (port, host_name),
+    path_name_ (ACE_OS::strdup (path_name)),
+    addr_string_ (0),
+    addr_string_len_ (0)
+{
+}
+
+const ACE_TCHAR *
+ACE_URL_Addr::get_path_name (void) const
+{
+  return this->path_name_;
+}
+
+ACE_URL_Addr::~ACE_URL_Addr (void)
+{
+  ACE_OS::free (reinterpret_cast<void *> (const_cast<ACE_TCHAR *>
+                                                      (this->path_name_)));
+  ACE_OS::free (reinterpret_cast<void *> (const_cast<ACE_TCHAR *>
+                                                      (this->addr_string_)));
+  this->path_name_ = 0;
+}
+
+int
+ACE_URL_Addr::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
diff --git a/ACE/examples/Web_Crawler/URL_Addr.h b/ACE/examples/Web_Crawler/URL_Addr.h
new file mode 100644
index 00000000000..9792e1bb390
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Addr.h
@@ -0,0 +1,111 @@
+// -*- C++ -*-
+
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    URL_Addr.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef ACE_URL_ADDR_H
+#define ACE_URL_ADDR_H
+
+#include "ace/INET_Addr.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+#include "ace/ACE.h"
+
+class ACE_URL_Addr : public ACE_INET_Addr
+{
+  // = TITLE
+  //    Defines a URL address family address format.
+public:
+  // = Initialization and termination methods.
+  ACE_URL_Addr (void);
+  // Constructor.
+
+  ACE_URL_Addr (const ACE_TCHAR *host_name,
+                const ACE_TCHAR *path_name,
+                u_short port = ACE_DEFAULT_HTTP_PORT);
+
+  ACE_URL_Addr (const ACE_URL_Addr &addr);
+  // Copy constructor.
+
+  int set (const ACE_URL_Addr &addr);
+  // Essentially the copy constructor.
+
+  virtual int string_to_addr (const ACE_TCHAR *address);
+  // Initializes an <ACE_URL_Addr> from the <address>, which can be
+  // "ip-number:port-number/path-name" (e.g.,
+  // "www.cs.wustl.edu:1234/~schmidt/" "ip-number:port-number/path-name"
+  // (e.g., "128.252.166.57:1234/~schmidt").  If there is no ':' in
+  // the <address> it is assumed to be an ip-number or ip-address
+  // number, with the port number <ACE_DEFAULT_HTTP_PORT>.
+
+  virtual int addr_to_string (ACE_TCHAR *s,
+                              size_t size,
+                              int ipaddr_format = 1) const;
+  // Transform the current <ACE_INET_Addr> address into string format.
+  // If <ipaddr_format> is non-0 this produces
+  // "ip-number:port-number/path-name" (e.g.,
+  // "128.252.166.57:80/~schmidt/"), whereas if <ipaddr_format> is 0
+  // this produces "ip-name:port-number" (e.g.,
+  // "www.cs.wustl.edu:80/~schmidt/").  Returns -1 if the <size> of
+  // the <buffer> is too small, else 0.
+
+  virtual const ACE_TCHAR *addr_to_string (int ipaddr_format = 1) const;
+  // Transform the current <ACE_INET_Addr> address into string format.
+  // If <ipaddr_format> is non-0 this produces
+  // "ip-number:port-number/path-name" (e.g.,
+  // "128.252.166.57:80/~schmidt/"), whereas if <ipaddr_format> is 0
+  // this produces "ip-name:port-number" (e.g.,
+  // "www.cs.wustl.edu:80/~schmidt/").  Uses dynamic memory, which
+  // is allocated on demand and deallocated when the object is
+  // destroyed.  Returns -1 if dynamic memory fails, else 0.
+
+  void operator= (const ACE_URL_Addr &addr);
+  // Assignment operator.
+
+  ~ACE_URL_Addr (void);
+  // Destructor.
+
+  bool operator == (const ACE_URL_Addr &SAP) const;
+  // Compare two addresses for equality.  The addresses are considered
+  // equal if they contain the same IP address, port number, and path
+  // name.
+
+  bool operator != (const ACE_URL_Addr &SAP) const;
+  // Compare two addresses for inequality.
+
+  virtual u_long hash (void) const;
+  // Computes and returns hash value.
+
+  const ACE_TCHAR *get_path_name (void) const;
+  // Return the path name.
+
+  int destroy (void);
+  // Commit suicide.
+private:
+  ACE_TCHAR *path_name_;
+  // Our path name.
+
+  ACE_TCHAR *addr_string_;
+  // The dynamically address string that's used for the
+  // <addr_to_string> method.
+
+  size_t addr_string_len_;
+  // Current length of the <addr_string_>
+};
+
+#endif /* ACE_URL_ADDR_H */
diff --git a/ACE/examples/Web_Crawler/URL_Status.cpp b/ACE/examples/Web_Crawler/URL_Status.cpp
new file mode 100644
index 00000000000..35a57420593
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Status.cpp
@@ -0,0 +1,40 @@
+/* -*- C++ -*- */
+// $Id$
+
+#include "URL_Status.h"
+
+ACE_RCSID(Web_Crawler, URL_Status, "$Id$")
+
+URL_Status::URL_Status (STATUS_CODE code)
+  : status_ (code)
+{
+}
+
+URL_Status::URL_Status (const URL_Status &s)
+  : status_ (s.status_)
+{
+}
+
+URL_Status::STATUS_CODE
+URL_Status::status (void) const
+{
+  return this->status_;
+}
+
+void
+URL_Status::status (int s)
+{
+  this->status_ = URL_Status::STATUS_CODE (s);
+}
+
+void
+URL_Status::status (URL_Status::STATUS_CODE s)
+{
+  this->status_ = s;
+}
+
+int URL_Status::destroy (void)
+{
+  delete this;
+  return 0;
+}
diff --git a/ACE/examples/Web_Crawler/URL_Status.h b/ACE/examples/Web_Crawler/URL_Status.h
new file mode 100644
index 00000000000..672c5e4f240
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Status.h
@@ -0,0 +1,61 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    URL_Status.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _URL_STATUS_H
+#define _URL_STATUS_H
+
+#include "ace/config-all.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+class URL_Status
+{
+  // = TITLE
+public:
+  enum STATUS_CODE
+  {
+    STATUS_OK = 200,
+    STATUS_CREATED = 201,
+    STATUS_ACCEPTED = 202,
+    STATUS_NO_CONTENT = 204,
+    STATUS_MOVED_PERMANENTLY = 301,
+    STATUS_MOVED_TEMPORARILY = 302,
+    STATUS_NOT_MODIFIED = 304,
+    STATUS_BAD_REQUEST = 400,
+    STATUS_UNAUTHORIZED = 401,
+    STATUS_FORBIDDEN = 403,
+    STATUS_ITEM_NOT_FOUND = 404,
+    STATUS_INTERNAL_SERVER_ERROR = 500,
+    STATUS_OP_NOT_IMPLEMENTED = 501,
+    STATUS_BAD_GATEWAY = 502,
+    STATUS_SERVICE_UNAVAILABLE = 503,
+    STATUS_INSUFFICIENT_DATA = 399
+  };
+
+  URL_Status (STATUS_CODE = STATUS_INSUFFICIENT_DATA);
+  URL_Status (const URL_Status &);
+
+  STATUS_CODE status (void) const;
+  void status (int);
+  void status (STATUS_CODE);
+  int destroy (void);
+private:
+  STATUS_CODE status_;  
+};
+
+#endif /* _URL_STATUS_H */
diff --git a/ACE/examples/Web_Crawler/URL_Visitor.cpp b/ACE/examples/Web_Crawler/URL_Visitor.cpp
new file mode 100644
index 00000000000..481a7140089
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Visitor.cpp
@@ -0,0 +1,543 @@
+// $Id$
+
+#include "ace/OS_NS_string.h"
+#include "URL_Visitor.h"
+#include "Command_Processor.h"
+
+ACE_RCSID(Web_Crawler, URL_Visitor, "$Id$")
+
+URL_Processing_Strategy::URL_Processing_Strategy (URL &url,
+                                                  URL_Iterator &iterator)
+  : url_ (url),
+    iterator_ (iterator)
+{
+}
+
+URL_Processing_Strategy::~URL_Processing_Strategy (void)
+{
+}
+
+int
+URL_Processing_Strategy::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+URL_Download_Strategy::URL_Download_Strategy (URL &url,
+                                              URL_Iterator &iterator)
+  : URL_Processing_Strategy (url, iterator)
+{
+}
+
+int
+URL_Download_Strategy::execute (void)
+{
+  ACE_CString buffer;
+
+  // Extract all the contents of the Stream and print them to the
+  // file.
+  while (this->iterator_.next (buffer) != 0)
+    ACE_DEBUG ((LM_DEBUG,
+                "%s",
+                buffer.c_str ()));
+
+  return 0;
+}
+
+HTTP_Header_Processing_Strategy::HTTP_Header_Processing_Strategy (URL &url,
+                                                                  URL_Iterator &iterator)
+  : URL_Processing_Strategy (url, iterator)
+{
+}
+
+int
+HTTP_Header_Processing_Strategy::execute (void)
+{
+  // Set the get() position.Necessary since later a peek is done.
+  if (this->url_.stream ().get_char () == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n","Header Not Found"),
+                      -1);
+  char line_buf[BUFSIZ + 1];
+  ACE_CString line (line_buf);
+  // Get the lines in the header iteratively and check for status info.
+  int result = 1, i = 0;
+  for (i = 0, result = this->iterator_.next (line);
+       result > 0;
+       ++i, result = this->iterator_.next (line))
+    {
+      if (i == 0)
+        {
+          // Assuming that the status-no is a space away.
+          int status_index = line.find ("HTTP", 0);
+          ACE_CString status = line.substring (status_index + 9, //HTTP/1.1 200
+                                               3);
+
+           URL_Status *url_status = 0;
+          ACE_NEW_RETURN (url_status,
+                          URL_Status,
+                          0);
+          Auto_Destroyer<URL_Status> url_status_ptr (url_status);
+          url_status_ptr->status (ACE_OS::atoi (status.c_str ()));
+          this->url_.reply_status (**url_status_ptr);
+          // Invalid url.
+          if (url_status_ptr->status () != 200)
+            return -1;
+        }
+      else
+        {
+
+          if (line.find ("text/html") != ACE_CString::npos)
+            {
+              ACE_CString url_content_type("text/html");
+              this->url_.content_type (url_content_type);
+            }
+        }
+    }
+  return 0;
+
+}
+
+HTML_Body_Validation_Strategy::HTML_Body_Validation_Strategy (URL &url,
+                                                              URL_Iterator &iterator,
+                                                              URL_Validation_Visitor &context)
+  : URL_Processing_Strategy (url, iterator),
+    visitor_context_ (context)
+{
+}
+
+int
+HTML_Body_Validation_Strategy::execute (void)
+{
+  char host_name_buf[BUFSIZ + 1];
+  ACE_CString host_name (host_name_buf);
+  host_name.set (url_.url_addr ().get_host_name (),1);
+
+  // All to facilitate relative paths
+  char temp[BUFSIZ + 1];
+  ACE_CString prev_location (temp);
+
+  prev_location.set (ACE_TEXT_ALWAYS_CHAR (this->url_.url_addr ().get_path_name ()),
+                     ACE_OS::strlen (this->url_.url_addr ().get_path_name ()),
+                     1);
+  int index = prev_location.rfind ('/', prev_location.length ());
+  ACE_CString str = prev_location.substring (0, index + 1);
+  prev_location.set (str.c_str (), 1);
+
+  // Note: prev_location always ends with '/'
+  if (prev_location[0] != '/')
+    prev_location = "/" + prev_location;
+
+  // Build the url portion which can be attached to teh relative paths.
+  prev_location = host_name + prev_location;
+
+  char url_string[BUFSIZ + 1];
+  ACE_CString url (url_string);
+
+  while (this->iterator_.next (url) > 0)
+    {
+      // Check for relative urls.Strip out "http://" if its there.
+      if (url.find ("http") == url.npos)
+        {
+          if (url[0] == '.' && url[1] == '.')
+           {
+             url.set (&url[3], 1);
+             int i = prev_location.rfind ('/', prev_location.length () - 1);
+             prev_location = prev_location.substring (0, i+1);
+           }
+          if (url[0] == '.' && url[1] == '/')
+            url.set (&url[2], 1);
+
+          url = prev_location + url;
+        }
+      else
+        url.set (&url[7], 1);
+      // Double slash at the end works!e.g www.cs.wustl.edu/~kirthika//
+      if (url.find (".html") == url.npos)
+        url = url + "/";
+
+      // Create the new URL address.
+      ACE_URL_Addr *url_addr;
+      ACE_NEW_RETURN (url_addr,
+                      ACE_URL_Addr,
+                      0);
+      Auto_Destroyer<ACE_URL_Addr> url_addr_ptr (url_addr);
+      if (url_addr_ptr->string_to_addr (ACE_TEXT_CHAR_TO_TCHAR (url.c_str ())) == 0)
+        {
+          HTTP_URL *http_url;
+          ACE_NEW_RETURN (http_url,
+                          HTTP_URL (**url_addr_ptr,
+                                    dynamic_cast<HTTP_URL *> (&this->url_)),
+                          0);
+          URL_Command *url_command;
+          ACE_NEW_RETURN (url_command,
+                          URL_Command (http_url),
+                          0);
+
+          OPTIONS::instance ()->command_processor ()->insert (url_command);
+        }
+    }
+  return 0;
+}
+
+URL_Iterator *
+URL_Validation_Visitation_Strategy_Factory::make_header_iterator (void)
+{
+  URL_Iterator *i;
+  ACE_NEW_RETURN (i,
+                  HTTP_Header_Iterator (*this->url_),
+                  0);
+  return i;
+}
+
+URL_Iterator *
+URL_Validation_Visitation_Strategy_Factory::make_body_iterator (void)
+{
+  URL_Iterator *i;
+  ACE_NEW_RETURN (i,
+                  HTML_Body_Iterator (*this->url_),
+                  0);
+  return i;
+}
+
+URL_Processing_Strategy *
+URL_Validation_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator &iterator)
+{
+  URL_Processing_Strategy *ps;
+  ACE_NEW_RETURN (ps,
+                  HTTP_Header_Processing_Strategy (*this->url_,
+                                                   iterator),
+                  0);
+  return ps;
+}
+
+URL_Processing_Strategy *
+URL_Validation_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator &iterator)
+{
+  URL_Processing_Strategy *ps;
+  ACE_NEW_RETURN (ps,
+                  HTML_Body_Validation_Strategy (*this->url_,
+                                                 iterator,
+                                                 this->visitor_context_),
+                  0);
+  return ps;
+}
+
+int
+URL_Validation_Visitation_Strategy_Factory::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+URL_Visitor::~URL_Visitor (void)
+{
+}
+
+URL_Validation_Visitor::URL_Validation_Visitor (void)
+{
+  ACE_NEW (this->caching_connect_strategy_,
+           CACHED_CONNECT_STRATEGY (this->caching_strategy_));
+  ACE_NEW (this->strat_connector_,
+           STRATEGY_CONNECTOR(0,
+                           &creation_strategy_,
+                           caching_connect_strategy_,
+                           &activation_strategy_));
+  if (strat_connector_ == 0)
+    ACE_ERROR ((LM_ERROR,
+                "%p %s\n"
+                "strategy connector creation failed"));
+
+
+}
+
+URL_Validation_Visitor::~URL_Validation_Visitor (void)
+{
+  this->strat_connector_ = 0;
+  if (this->caching_connect_strategy_ != 0)
+    delete this->caching_connect_strategy_;
+}
+
+URL_Validation_Visitor::URL_CACHE &
+URL_Validation_Visitor::url_cache (void)
+{
+  return this->url_cache_;
+}
+
+int
+URL_Validation_Visitor::in_cache (const ACE_URL_Addr &url_addr)
+{
+  URL_Status reply_status (URL_Status::STATUS_CODE (1));
+
+  if (this->url_cache_.find (url_addr, reply_status) == 0)
+    {
+      ACE_DEBUG ((LM_DEBUG,
+                  "status %d for URL %s (cached)\n",
+                  reply_status.status (),
+                  url_addr.addr_to_string (0)));
+
+      // Invalid status.
+      if (reply_status.status () != 200)
+        return -1;
+
+      return 1;
+    }
+  else
+    return 0;
+}
+
+URL_Visitation_Strategy_Factory *
+URL_Validation_Visitor::make_visitation_strategy_factory (URL &url)
+{
+  // Since this is HTTP 1.1 we'll need to establish a connection
+  // only once. Trying for relative paths.
+
+  if (url.stream ().open (this->strat_connector_,
+                          url.url_addr ()) == -1)
+    return 0;
+
+  // See if we can get connected and send the GET request via the
+  // <HTTP_URL>.
+  int result = url.send_request ();
+  if (result == -1)
+    {
+      ACE_ERROR ((LM_ERROR,
+                  "%p\n",
+                  "send_request"));
+      if (this->url_cache_.bind (url.url_addr (),
+                                 URL_Status (URL_Status::STATUS_SERVICE_UNAVAILABLE)) == -1)
+        ACE_ERROR ((LM_ERROR,
+                    "%p\n",
+                    "bind"));
+      return 0;
+    }
+  // @@ Here's where we could check to see if the <url> was HTTP or
+  // FTP, etc.  But for now we'll just assume that everything is an
+  // HTTP URL.
+  else
+    {
+
+      URL_Visitation_Strategy_Factory *vs;
+      ACE_NEW_RETURN (vs,
+                      URL_Validation_Visitation_Strategy_Factory (&url,
+                                                                  *this),
+                      0);
+      return vs;
+    }
+}
+
+int
+URL_Validation_Visitor::destroy (void)
+{
+  delete this->strat_connector_;
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+int
+URL_Validation_Visitor::visit (HTTP_URL &http_url)
+{
+  int result = this->in_cache (http_url.url_addr ());
+  if (result == 0)
+    {
+      Auto_Destroyer <URL_Visitation_Strategy_Factory> vs (this->make_visitation_strategy_factory (http_url));
+
+      if (*vs == 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "make_visitation_strategy_factory"),
+                          -1);
+
+      Auto_Destroyer <URL_Iterator> ihs (vs->make_header_iterator ());
+      if (*ihs == 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "make_header_iterator"),
+                          -1);
+      Auto_Destroyer <URL_Processing_Strategy> phs (vs->make_header_strategy (**ihs));
+      if (*phs == 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "make_header_strategy"),
+                          -1);
+      int phs_result = phs->execute ();
+      if (phs_result == -1)
+        ACE_DEBUG ((LM_DEBUG,
+                    "Invalid "));
+
+      ACE_DEBUG ((LM_DEBUG,
+                  "URL with status %d %s\n",
+                  http_url.reply_status ().status (),
+                  http_url.url_addr().addr_to_string (0)));
+
+      // Store the http url in the cache.
+      if (this->url_cache ().bind (http_url.url_addr (),
+                                   http_url.reply_status ()) != 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n","url_cache.bind"),
+                          -1);
+
+      // Since it is invalid dont go further.
+      if (phs_result == -1)
+        return 0;
+
+      // Get back if the recurse option isnt set.
+      if (OPTIONS::instance ()->recurse () != 1)
+        return 0;
+
+      Auto_Destroyer <URL_Iterator> is (vs->make_body_iterator ());
+      if (*is == 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "make_body_iterator"),
+                          -1);
+
+      Auto_Destroyer <URL_Processing_Strategy> ps (vs->make_body_strategy (**is));
+      if (*ps == 0)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "make_body_strategy"),
+                          -1);
+
+      if (ps->execute () == -1)
+        ACE_ERROR_RETURN ((LM_ERROR,
+                           "%p\n",
+                           "body execute"),
+                          -1);
+
+    }
+  return 0;
+}
+
+int
+URL_Download_Visitation_Strategy_Factory::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+URL_Iterator *
+URL_Download_Visitation_Strategy_Factory::make_header_iterator (void)
+{
+  return 0;
+}
+
+URL_Iterator *
+URL_Download_Visitation_Strategy_Factory::make_body_iterator (void)
+{
+  URL_Iterator *i;
+  ACE_NEW_RETURN (i,
+                  URL_Download_Iterator (*this->url_),
+                  0);
+  return i;
+}
+
+URL_Processing_Strategy *
+URL_Download_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator &iterator)
+{
+  // You fill in here.
+  ACE_UNUSED_ARG (iterator);
+
+  return 0;
+}
+
+URL_Processing_Strategy *
+URL_Download_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator &iterator)
+{
+  URL_Processing_Strategy *ps;
+  ACE_NEW_RETURN (ps,
+                  URL_Download_Strategy (*this->url_,
+                                         iterator),
+                  0);
+  return ps;
+}
+
+URL_Visitation_Strategy_Factory::URL_Visitation_Strategy_Factory (URL *url)
+  : url_ (url)
+{
+}
+
+URL_Visitation_Strategy_Factory::~URL_Visitation_Strategy_Factory (void)
+{
+}
+
+URL_Download_Visitation_Strategy_Factory::URL_Download_Visitation_Strategy_Factory (URL *url)
+  : URL_Visitation_Strategy_Factory (url)
+{
+}
+
+URL_Validation_Visitation_Strategy_Factory::URL_Validation_Visitation_Strategy_Factory (URL *url,
+                                                                                        URL_Validation_Visitor &visitor_context)
+  : URL_Visitation_Strategy_Factory (url),
+    visitor_context_ (visitor_context)
+{
+}
+
+URL_Visitation_Strategy_Factory *
+URL_Download_Visitor::make_visitation_strategy_factory (URL &url)
+{
+  // See if we can get connected and send the GET request via the
+  // <HTTP_URL>.
+  while (1)
+    {
+      int retval = url.send_request ();
+      if (retval != -1)
+        break;
+
+    }
+  // @@ Here's where we could check to see if the <url> was HTTP or
+  // FTP, etc.  But for now we'll just assume that everything is an
+  // HTTP URL.
+  URL_Visitation_Strategy_Factory *vs;
+  ACE_NEW_RETURN (vs,
+                  URL_Download_Visitation_Strategy_Factory (&url),
+                  0);
+  return vs;
+
+}
+
+int
+URL_Download_Visitor::destroy (void)
+{
+  // Commit suicide.
+  delete this;
+  return 0;
+}
+
+int
+URL_Download_Visitor::visit (HTTP_URL &http_url)
+{
+  Auto_Destroyer <URL_Visitation_Strategy_Factory> vs (this->make_visitation_strategy_factory (http_url));
+
+  if (*vs == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "make_visitation_strategy_factory"),
+                      -1);
+
+  Auto_Destroyer <URL_Iterator> is (vs->make_body_iterator ());
+  if (*is == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "make_body_iterator"),
+                      -1);
+
+  Auto_Destroyer <URL_Processing_Strategy> ps (vs->make_body_strategy (**is));
+  if (*ps == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "make_body_strategy"),
+                      -1);
+
+  if (ps->execute () == -1)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "body execute"),
+                      -1);
+  return 0;
+}
diff --git a/ACE/examples/Web_Crawler/URL_Visitor.h b/ACE/examples/Web_Crawler/URL_Visitor.h
new file mode 100644
index 00000000000..9f68612d629
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Visitor.h
@@ -0,0 +1,436 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    URL_Visitor.h
+//
+// = AUTHOR
+//    Douglas C.Schmidt <schmidt@cs.wustl.edu>
+//    Kirthika Parameswaran <kirthika@cs.wustl.edu>
+// ============================================================================
+
+#ifndef _URL_VISITOR_H
+#define _URL_VISITOR_H
+#include /**/ "ace/pre.h"
+
+#include "ace/Strategies_T.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+
+#include "HTTP_URL.h"
+#include "Iterators.h"
+#include "ace/Hash_Map_Manager_T.h"
+#include "ace/Caching_Strategies_T.h"
+#include "ace/Cached_Connect_Strategy_T.h"
+#include "Options.h"
+#include "ace/Pair_T.h"
+
+// Forward declarations.
+class URL_Validation_Visitor;
+
+class URL_Processing_Strategy
+{
+  // = TITLE
+  //   Abstract base class for the URL processing strategy.
+  //
+  // = DESCRIPTION
+public:
+  URL_Processing_Strategy (URL &,
+                           URL_Iterator &);
+  // Constructor.
+
+  virtual ~URL_Processing_Strategy (void);
+
+  virtual int execute (void) = 0;
+  // Perform the strategy.
+
+  virtual int destroy (void);
+
+  // Close down the resources.
+
+protected:
+  URL &url_;
+  // A reference to the URL "context" that we're processing.
+
+  URL_Iterator &iterator_;
+  // Iterator for the URL that we're processing.
+};
+
+class HTTP_Header_Processing_Strategy : public URL_Processing_Strategy
+{
+  // = TITLE
+  //   Defines the HTTP header processing strategy.
+  //
+  // = DESCRIPTION
+public:
+  HTTP_Header_Processing_Strategy (URL &,
+                                   URL_Iterator &);
+  // Constructor.
+
+  virtual int execute (void);
+  // Perform the strategy for processing an HTTP header.
+};
+
+class HTML_Body_Validation_Strategy : public URL_Processing_Strategy
+{
+  // = TITLE
+  //   Defines the HTML body processing strategy.
+  //
+  // = DESCRIPTION
+  //   This class iterates through the body of an HTML file and
+  //   recursively visits embedded links.
+public:
+  HTML_Body_Validation_Strategy (URL &,
+                                 URL_Iterator &,
+                                 URL_Validation_Visitor &);
+  // Constructor.
+
+  virtual int execute (void);
+  // Perform the strategy for processing an HTML file.  This strategy
+  // iterates over the HTML file and recursively visits embedded links
+  // to process them, as well.
+
+private:
+  URL_Validation_Visitor &visitor_context_;
+  // This is the context of the visit.
+};
+
+class URL_Download_Strategy : public URL_Processing_Strategy
+{
+  // = TITLE
+  //   Defines a URL downloading strategy.
+  //
+  // = DESCRIPTION
+  //   This class downloads a URL's contents into a temporary file.
+public:
+  URL_Download_Strategy (URL &,
+                         URL_Iterator &);
+  // Constructor.
+
+  virtual int execute (void);
+  // Perform the strategy for downloading a URL to a temporary file.
+};
+
+class URL_Visitation_Strategy_Factory
+{
+  // = TITLE
+  //   Abstract Factory for the URL visitation strategy.
+  //
+  // = DESCRIPTION
+public:
+  URL_Visitation_Strategy_Factory (URL *);
+
+  /// Destructor.
+  virtual ~URL_Visitation_Strategy_Factory (void);
+
+  // = Factory Methods.
+  virtual URL_Iterator *make_header_iterator (void) = 0;
+  // Factory Method that makes the header iterator.
+
+  virtual URL_Iterator *make_body_iterator (void) = 0;
+  // Factory Method that makes the body iterator.
+
+  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &) = 0;
+  // Factory Method that makes the header processing strategy.
+
+  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &) = 0;
+  // Factory Method that makes the body processing strategy .
+
+  virtual int destroy (void) = 0;
+  // Close down the resources.
+
+protected:
+  URL *url_;
+  // Stash the URL so we don't have to pass it around.
+};
+
+class URL_Download_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
+{
+  // = TITLE
+  //   Concrete Factory for the URL validation visitation strategy.
+  //
+  // = DESCRIPTION
+public:
+  URL_Download_Visitation_Strategy_Factory (URL *);
+  // Constructor.
+
+  // = Factory Methods.
+  virtual URL_Iterator *make_header_iterator (void);
+  // Factory Method that makes an <HTTP_Header_Iterator>.
+
+  virtual URL_Iterator *make_body_iterator (void);
+  // Factory Method that makes an <HTML_Body_Iterator>.
+
+  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
+  // Factory Method that makes the header processing strategy.
+
+  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
+  // Factory Method that makes the body processing strategy .
+
+  virtual int destroy (void);
+  // Close down the resources.
+};
+
+class URL_Validation_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
+{
+  // = TITLE
+  //   Concrete Factory for the URL validation visitation strategy.
+  //
+  // = DESCRIPTION
+public:
+  URL_Validation_Visitation_Strategy_Factory (URL *,
+                                              URL_Validation_Visitor &);
+  // Constructor.
+
+  // = Factory Methods.
+  virtual URL_Iterator *make_header_iterator (void);
+  // Factory Method that makes an <HTTP_Header_Iterator>.
+
+  virtual URL_Iterator *make_body_iterator (void);
+  // Factory Method that makes an <HTML_Body_Iterator>.
+
+  virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
+  // Factory Method that makes the header processing strategy.
+
+  virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
+  // Factory Method that makes the body processing strategy .
+
+  virtual int destroy (void);
+  // Close down the resources.
+
+private:
+  URL_Validation_Visitor &visitor_context_;
+  // Context of the visitor.
+};
+
+class URL_Visitor
+{
+  // = TITLE
+  //   Base class for the URL Visitor.
+  //
+  // = DESCRIPTION
+  //   This class plays the "visitor" role in the Visitor pattern.
+public:
+
+  virtual ~URL_Visitor (void);
+
+  virtual int visit (HTTP_URL &http_url) = 0;
+  // Visit an <HTTP_URL>.
+
+  // @@
+  // virtual int visit (FTP_URL &http_url) = 0;
+
+  virtual int destroy (void) = 0;
+  // Cleanup the resources.
+
+protected:
+  virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &) = 0;
+  // Make the appropriate <URL_Visitation_Strategy_Factory>.
+};
+
+typedef int ATTRIBUTES;
+typedef ACE_Svc_Handler <ACE_SOCK_STREAM, ACE_NULL_SYNCH>
+        Client_Svc_Handler;
+typedef ACE_Pair<Client_Svc_Handler *, ATTRIBUTES>
+        CACHED_HANDLER;
+typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
+        ACE_ADDR;
+typedef ACE_Hash<ACE_ADDR> H_KEY;
+typedef ACE_Equal_To<ACE_ADDR> C_KEYS;
+
+typedef ACE_Hash_Map_Manager_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
+        HASH_MAP;
+typedef ACE_Hash_Map_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
+        HASH_MAP_ITERATOR;
+typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
+        HASH_MAP_REVERSE_ITERATOR;
+
+typedef ACE_Recyclable_Handler_Cleanup_Strategy<ACE_ADDR, CACHED_HANDLER, HASH_MAP>
+        CLEANUP_STRATEGY;
+typedef ACE_Recyclable_Handler_Caching_Utility<ACE_ADDR, CACHED_HANDLER, HASH_MAP, HASH_MAP_ITERATOR, ATTRIBUTES>
+        CACHING_UTILITY;
+
+typedef ACE_LRU_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
+        LRU_CACHING_STRATEGY;
+
+typedef LRU_CACHING_STRATEGY
+        CACHING_STRATEGY;
+
+typedef ACE_Strategy_Connector<Client_Svc_Handler, ACE_SOCK_CONNECTOR>
+        STRATEGY_CONNECTOR;
+
+typedef ACE_NOOP_Creation_Strategy<Client_Svc_Handler>
+        NULL_CREATION_STRATEGY;
+
+typedef ACE_NOOP_Concurrency_Strategy<Client_Svc_Handler>
+        NULL_ACTIVATION_STRATEGY;
+
+typedef ACE_Cached_Connect_Strategy_Ex<Client_Svc_Handler, ACE_SOCK_CONNECTOR, CACHING_STRATEGY, ATTRIBUTES, ACE_SYNCH_NULL_MUTEX>
+        CACHED_CONNECT_STRATEGY;
+
+class URL_Validation_Visitor : public URL_Visitor
+{
+  // = TITLE
+  //   Subclass that defines the URL validation visitor.
+  //
+  // = DESCRIPTION
+  //   This class checks to make sure that the <HTTP_URL> is valid.
+  //   If the <HTTP_URL> is an <HTML> file, it can also be used to
+  //   recursively check that all embedded links in this file are
+  //   valid.
+public:
+  typedef ACE_Hash_Map_Manager <ACE_URL_Addr, URL_Status, ACE_Null_Mutex>
+          URL_CACHE;
+
+  virtual int visit (HTTP_URL &http_url);
+  // Visit an <HTTP_URL> to make sure that it's valid.  If the content
+  // type of the <HTTP_URL> is "text/html" and the <recursion> option
+  // is enabled then <visit> recursively checks each link embedded in
+  // the HTML page.
+
+  // @@
+  // virtual int visit (FTP_URL &http_url);
+
+  URL_Validation_Visitor (void);
+  virtual int destroy (void);
+  // Cleanup the resources.
+
+  URL_CACHE &url_cache (void);
+  // Returns a reference to the URL cache.
+
+
+  /*
+
+
+  typedef ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>
+          Svc_Handler;
+  typedef ACE_Strategy_Connector<Svc_Handler, ACE_SOCK_CONNECTOR>
+          STRAT_CONNECTOR;
+  typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
+          REFCOUNTED_HASH_RECYCLABLE_ADDRESS;
+  typedef ACE_NOOP_Creation_Strategy<Svc_Handler>
+          NULL_CREATION_STRATEGY;
+  typedef ACE_NOOP_Concurrency_Strategy<Svc_Handler>
+          NULL_ACTIVATION_STRATEGY;
+
+  typedef ACE_Hash_Map_Manager_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
+                                  ACE_Pair<Svc_Handler *, int>,\
+                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
+                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
+                                  ACE_Null_Mutex>
+          CONNECTION_HASH_MAP;
+  typedef ACE_Hash_Map_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
+                                  ACE_Pair<Svc_Handler *, int>,\
+                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
+                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
+                                  ACE_Null_Mutex>
+          CONNECTION_HASH_MAP_ITERATOR;
+  typedef ACE_Hash_Map_Reverse_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
+                                  ACE_Pair<Svc_Handler *, int>,\
+                                  ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
+                                  ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
+                                  ACE_Null_Mutex>
+          CONNECTION_HASH_MAP_REVERSE_ITERATOR;
+  typedef ACE_Pair_Caching_Utility <REFCOUNTED_HASH_RECYCLABLE_ADDRESS, \
+                                            ACE_Pair<Svc_Handler *, int>, \
+                                            CONNECTION_HASH_MAP, CONNECTION_HASH_MAP_ITERATOR, int >
+          CACHING_STRATEGY_UTILITY;
+  typedef ACE_LRU_Caching_Strategy<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
+                                   ACE_Pair<Svc_Handler *, int>,\
+                                   CONNECTION_HASH_MAP, int,\
+                                   CACHING_STRATEGY_UTILITY >
+          LRU;
+  typedef ACE_Cached_Connect_Strategy_Ex<Svc_Handler,ACE_SOCK_CONNECTOR, LRU,int, ACE_SYNCH_NULL_MUTEX>
+          CACHED_CONNECT_STRATEGY;
+  */
+protected:
+  virtual ~URL_Validation_Visitor (void);
+  virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
+  // Factory Method that makes a
+  // <URL_Validation_Visitation_Strategy_Factory>.
+
+  URL_CACHE url_cache_;
+  // Cache the status of URLs we've already validated.
+
+  int in_cache (const ACE_URL_Addr &url_addr);
+  // Check to see if the reply status of this <url_addr> is in the
+  // cache.  Returns 1 if so, 0 if not.
+
+  NULL_CREATION_STRATEGY creation_strategy_;
+  NULL_ACTIVATION_STRATEGY activation_strategy_;
+
+  // Configure the Strategy Connector with a strategy that caches
+  // connection.
+  CACHED_CONNECT_STRATEGY *caching_connect_strategy_;
+
+  STRATEGY_CONNECTOR *strat_connector_;
+
+  CACHING_STRATEGY caching_strategy_;
+};
+
+
+class URL_Download_Visitor : public URL_Visitor
+{
+  // = TITLE
+  //   Subclass for the URL validtion visitor.
+  //
+  // = DESCRIPTION
+  //   This class checks to make sure that the <HTTP_URL> is valid.
+public:
+  virtual int visit (HTTP_URL &http_url);
+  // Visit an <HTTP_URL> to make sure that it's valid.  If the content
+  // type of the <HTTP_URL> is "text/html" and the <recursion> option
+  // is enabled then <visit> recursively checks each link embedded in
+  // the HTML page.
+
+  // @@
+  // virtual int visit (FTP_URL &http_url);
+
+  virtual int destroy (void);
+  // Cleanup the resources.
+
+protected:
+  URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
+  // Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>.
+};
+
+template <class T>
+class Auto_Destroyer
+{
+  // = TITLE
+  //   Simple class that ensures the <destroy> method is called on our
+  //   <URL_*> objects when they go out of scope.
+  //
+  // = DESCRIPTION
+  //   This class is similar to an auto_ptr<> and should be used to
+  //   simplify blocks of code that must create/destroy pointers to
+  //   various <URL_*> related strategies and iterators.
+public:
+  Auto_Destroyer (T *t): t_ (t) {}
+  T *operator-> (void) { return this->t_; }
+  T *operator *(void) { return this->t_; }
+  void operator= (T *t)
+  {
+    if (this->t_ != 0)
+      this->t_->destroy ();
+    this->t_ = t;
+  }
+  ~Auto_Destroyer (void)
+  {
+    if (this->t_ != 0)
+      t_->destroy ();
+  }
+private:
+  T *t_;
+};
+
+#include /**/ "ace/post.h"
+#endif /* _URL_VISITOR_H */
diff --git a/ACE/examples/Web_Crawler/URL_Visitor_Factory.cpp b/ACE/examples/Web_Crawler/URL_Visitor_Factory.cpp
new file mode 100644
index 00000000000..1b8a316b219
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Visitor_Factory.cpp
@@ -0,0 +1,53 @@
+/* -*- C++ -*- */
+// $Id$
+
+#include "URL_Visitor_Factory.h"
+
+ACE_RCSID (Web_Crawler,
+           URL_Visitor_Factory,
+           "$Id$")
+
+
+URL_Visitor_Factory::~URL_Visitor_Factory (void)
+{
+}
+
+URL_Visitor *
+URL_Validation_Visitor_Factory::make_visitor (void)
+{
+  URL_Visitor *v;
+
+  ACE_NEW_RETURN (v,
+                  URL_Validation_Visitor,
+                  0);
+
+  return v;
+}
+
+Command_Processor *
+URL_Validation_Visitor_Factory::make_command_processor (void)
+{
+  Command_Processor *cp;
+
+  ACE_NEW_RETURN (cp,
+                  Command_Processor,
+                  0);
+  return cp;
+}
+
+URL_Visitor *
+URL_Download_Visitor_Factory::make_visitor (void)
+{
+  URL_Visitor *v;
+
+  ACE_NEW_RETURN (v,
+                  URL_Download_Visitor,
+                  0);
+  return v;
+}
+
+Command_Processor *
+URL_Download_Visitor_Factory::make_command_processor (void)
+{
+  return 0;
+}
diff --git a/ACE/examples/Web_Crawler/URL_Visitor_Factory.h b/ACE/examples/Web_Crawler/URL_Visitor_Factory.h
new file mode 100644
index 00000000000..9f484afe9f0
--- /dev/null
+++ b/ACE/examples/Web_Crawler/URL_Visitor_Factory.h
@@ -0,0 +1,74 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    URL_Visitor_Factory.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _URL_VISITOR_FACTORY_H
+#define _URL_VISITOR_FACTORY_H
+
+#include "URL_Visitor.h"
+#include "Command_Processor.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+class URL_Visitor_Factory
+{
+  // = TITLE
+  //   Abstract base class that creates URL visitors.
+  //
+  // = DESCRIPTION
+  //   Subclasses define each of the Factory Methods to
+  //   make the right objects, which all "vary" together.
+public:
+
+  /// Destructor.
+  virtual ~URL_Visitor_Factory (void);
+
+  virtual URL_Visitor *make_visitor (void) = 0;
+  // Factory Method that makes the appropriate type of <URL_Visitor>.
+
+  virtual Command_Processor *make_command_processor (void) = 0;
+  // Factory Method that makes the appropriate type of
+  // <Command_Processor>.
+};
+
+class URL_Validation_Visitor_Factory : public URL_Visitor_Factory
+{
+  // = TITLE
+  //   Create a URL visitor that validates URL links.
+public:
+  virtual URL_Visitor *make_visitor (void);
+  // Factory Method that makes a <URL_Validation_Visitor>.
+
+  virtual Command_Processor *make_command_processor (void);
+  // Factory Method that makes a <FIFO_Command_Processor>.
+
+
+};
+
+class URL_Download_Visitor_Factory : public URL_Visitor_Factory
+{
+  // = TITLE
+  //   Create a URL visitor that downloads URL links.
+public:
+  virtual URL_Visitor *make_visitor (void);
+  // Factory Method that makes a <URL_Download_Visitor>.
+
+  virtual Command_Processor *make_command_processor (void);
+  // Factory Method that makes a <FIFO_Command_Processor>.
+};
+
+#endif /* _URL_VISITOR_FACTORY_H */
diff --git a/ACE/examples/Web_Crawler/Web_Crawler.cpp b/ACE/examples/Web_Crawler/Web_Crawler.cpp
new file mode 100644
index 00000000000..16639a38d73
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Web_Crawler.cpp
@@ -0,0 +1,95 @@
+// $Id$
+
+#include "Options.h"
+#include "URL_Visitor_Factory.h"
+#include "Web_Crawler.h"
+
+ACE_RCSID(Web_Crawler, Web_Crawler, "$Id$")
+
+Web_Crawler::~Web_Crawler (void)
+{
+  delete this->url_visitor_factory_;
+}
+
+Web_Crawler::Web_Crawler (void)
+  : url_visitor_factory_ (0)
+{
+}
+
+int
+Web_Crawler::open (int argc, ACE_TCHAR *argv[])
+{
+  if (OPTIONS::instance ()->parse_args (argc, argv) == -1)
+    return -1;
+  // @@ Put the ACE_Service_Config::open() stuff here somewhere...
+  else
+    {
+      // For now just hardcode this to create "validation" visitors.
+      ACE_NEW_RETURN (this->url_visitor_factory_,
+                      URL_Validation_Visitor_Factory,
+                      -1);
+      return 0;
+    }
+}
+
+int
+Web_Crawler::run (void)
+{ 
+  // Make the appropriate <URL_Visitor>.
+  Auto_Destroyer<URL_Visitor> visitor (this->url_visitor_factory_->make_visitor ());
+
+  if (*visitor == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "make_visitor"),
+                      -1);
+
+  // Make the appropriate <Command_Processor>.
+  Auto_Destroyer<Command_Processor> cp (this->url_visitor_factory_->make_command_processor ());
+
+  if (*cp == 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n",
+                       "make_command_processor"),
+                      -1);
+
+  // Set the <Command_Processor> in the <Options> to make it visible.
+  OPTIONS::instance ()->command_processor (*cp);
+
+  // Set the <URL_Visitor> in the <Options> to make it visible.
+  OPTIONS::instance ()->visitor (*visitor);
+
+  // @@ You fill in here...
+  ACE_URL_Addr *url_addr;
+  ACE_NEW_RETURN (url_addr,
+                  ACE_URL_Addr (OPTIONS::instance()->hostname (),
+                                OPTIONS::instance()->path_name (),
+                                OPTIONS::instance()->port_no ()), //KIRTHIKA
+                  0);
+  Auto_Destroyer<ACE_URL_Addr> url_addr_ptr (url_addr);
+                                                      
+  HTTP_URL *http_url;
+  ACE_NEW_RETURN (http_url,
+                  HTTP_URL (**url_addr_ptr),
+                  0);
+
+  Auto_Destroyer<HTTP_URL> http_url_ptr (http_url);
+     
+  URL_Command *url_command;
+  ACE_NEW_RETURN (url_command,
+                  URL_Command (*http_url_ptr),
+                  0);
+  // Auto_Destroyer<URL_Command> url_command_ptr (url_command);
+  
+  if (cp->insert (url_command) != 0)
+    ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n", "insert"),
+                      -1);
+  
+ if (cp->execute () != 0)
+   ACE_ERROR_RETURN ((LM_ERROR,
+                       "%p\n", "execute"),
+                      -1);
+  return 0;
+}
+
diff --git a/ACE/examples/Web_Crawler/Web_Crawler.h b/ACE/examples/Web_Crawler/Web_Crawler.h
new file mode 100644
index 00000000000..01e275e2187
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Web_Crawler.h
@@ -0,0 +1,62 @@
+/* -*- C++ -*- */
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    Web_Crawler.h
+//
+// = AUTHOR
+//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#ifndef _WEB_CRAWLER_H
+#define _WEB_CRAWLER_H
+
+#include "URL_Addr.h"
+#include "HTTP_URL.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+// Forward declaration.
+class URL_Visitor_Factory;
+
+class Web_Crawler
+{
+  // = TITLE
+  //    An abstraction for a Web Crawler.
+  //
+  // = DESCRIPTION
+  //    This class is a Facade that organizes the other classes in the
+  //    solution, which include a factory that creates a visitor,
+  //    which in turn embodies the appropriate visitation strategy.
+public:
+  // = Initialization and termination methods.
+  Web_Crawler (void);
+  // Constructor.
+
+  ~Web_Crawler (void);
+  // Destructor.
+
+  int open (int argc, ACE_TCHAR *argv[]);
+  // Parses the command-line options and initializes the
+  // <URL_Visitor_Factory>.
+
+  int run (void);
+  // Run the Web Crawler and carries out whatever visitation strategy
+  // is configured.  Returns -1 on failure and 0 on success.
+
+private:
+  URL_Visitor_Factory *url_visitor_factory_;
+  // Pointer to a factory that creates visitors that explore URLs and
+  // perform various tasks.  Subclasses of <URL_Visitor_Factory>
+  // determine what happens during a visitation.
+};
+
+#endif /* _WEB_CRAWLER_H */
diff --git a/ACE/examples/Web_Crawler/Web_Crawler.mpc b/ACE/examples/Web_Crawler/Web_Crawler.mpc
new file mode 100644
index 00000000000..7750d7cbd5d
--- /dev/null
+++ b/ACE/examples/Web_Crawler/Web_Crawler.mpc
@@ -0,0 +1,7 @@
+// -*- MPC -*-
+// $Id$
+
+project : aceexe {
+  avoids += ace_for_tao
+  exename = main
+}
diff --git a/ACE/examples/Web_Crawler/main.cpp b/ACE/examples/Web_Crawler/main.cpp
new file mode 100644
index 00000000000..1735f811b78
--- /dev/null
+++ b/ACE/examples/Web_Crawler/main.cpp
@@ -0,0 +1,51 @@
+// $Id$
+
+// ============================================================================
+//
+// = LIBRARY
+//    examples/Web_Crawler
+//
+// = FILENAME
+//    main.cpp
+//
+// = DESCRIPTION
+//     This program implements a Web crawler that can be configured to
+//     apply various strategies to URLs that it visits.
+//
+// = AUTHOR
+//    Doug Schmidt <schmidt@cs.wustl.edu>
+//
+// ============================================================================
+
+#include "ace/OS_main.h"
+#include "ace/Signal.h"
+#include "Web_Crawler.h"
+#include "Options.h"
+
+ACE_RCSID(Web_Crawler, main, "$Id$")
+
+void sig_handler (int)
+{
+  ACE_DEBUG ((LM_DEBUG,
+              ACE_TEXT ("aborting!\n")));
+  ACE_OS::abort ();
+}
+
+int 
+ACE_TMAIN (int argc, ACE_TCHAR *argv[])
+{
+#if !defined (ACE_HAS_WINCE)
+  ACE_Sig_Action sa ((ACE_SignalHandler) sig_handler, SIGFPE);
+#endif
+  Web_Crawler crawler;
+  
+  if (crawler.open (argc, argv) == -1)
+    return 1;
+  else if (crawler.run () == -1)
+    return 1;
+  else
+    return 0;
+}
+
+
+
author	William R. Otte <wotte@dre.vanderbilt.edu>	2006-07-24 15:50:30 +0000
committer	William R. Otte <wotte@dre.vanderbilt.edu>	2006-07-24 15:50:30 +0000
commit	c44379cc7d9c7aa113989237ab0f56db12aa5219 (patch)
tree	66a84b20d47f2269d8bdc6e0323f338763424d3a /ACE/examples/Web_Crawler
parent	3aff90f4a822fcf5d902bbfbcc9fa931d6191a8c (diff)
download	ATCD-c44379cc7d9c7aa113989237ab0f56db12aa5219.tar.gz