summaryrefslogtreecommitdiff
path: root/examples/Web_Crawler/URL_Visitor.h
diff options
context:
space:
mode:
Diffstat (limited to 'examples/Web_Crawler/URL_Visitor.h')
-rw-r--r--examples/Web_Crawler/URL_Visitor.h437
1 files changed, 0 insertions, 437 deletions
diff --git a/examples/Web_Crawler/URL_Visitor.h b/examples/Web_Crawler/URL_Visitor.h
deleted file mode 100644
index 4ab405a0319..00000000000
--- a/examples/Web_Crawler/URL_Visitor.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/* -*- C++ -*- */
-// $Id$
-
-// ============================================================================
-//
-// = LIBRARY
-// examples/Web_Crawler
-//
-// = FILENAME
-// URL_Visitor.h
-//
-// = AUTHOR
-// Douglas C.Schmidt <schmidt@cs.wustl.edu>
-// Kirthika Parameswaran <kirthika@cs.wustl.edu>
-// ============================================================================
-
-#ifndef _URL_VISITOR_H
-#define _URL_VISITOR_H
-
-#if !defined (ACE_LACKS_PRAGMA_ONCE)
-#define ACE_LACKS_PRAGMA_ONCE
-#endif /* ACE_LACKS_PRAGMA_ONCE */
-
-#include "ace/Strategies_T.h"
-#include "ace/Synch.h"
-#include "HTTP_URL.h"
-#include "Iterators.h"
-#include "ace/Hash_Map_Manager_T.h"
-#include "ace/Caching_Strategies_T.h"
-#include "ace/Cached_Connect_Strategy_T.h"
-#include "Options.h"
-#include "ace/Pair_T.h"
-
-// Forward declarations.
-class URL_Validation_Visitor;
-
-class URL_Processing_Strategy
-{
- // = TITLE
- // Abstract base class for the URL processing strategy.
- //
- // = DESCRIPTION
-public:
- URL_Processing_Strategy (URL &,
- URL_Iterator &);
- // Constructor.
-
- virtual int execute (void) = 0;
- // Perform the strategy.
-
- virtual int destroy (void);
-
- // Close down the resources.
-
-protected:
- URL &url_;
- // A reference to the URL "context" that we're processing.
-
- URL_Iterator &iterator_;
- // Iterator for the URL that we're processing.
-};
-
-class HTTP_Header_Processing_Strategy : public URL_Processing_Strategy
-{
- // = TITLE
- // Defines the HTTP header processing strategy.
- //
- // = DESCRIPTION
-public:
- HTTP_Header_Processing_Strategy (URL &,
- URL_Iterator &);
- // Constructor.
-
- virtual int execute (void);
- // Perform the strategy for processing an HTTP header.
-};
-
-class HTML_Body_Validation_Strategy : public URL_Processing_Strategy
-{
- // = TITLE
- // Defines the HTML body processing strategy.
- //
- // = DESCRIPTION
- // This class iterates through the body of an HTML file and
- // recursively visits embedded links.
-public:
- HTML_Body_Validation_Strategy (URL &,
- URL_Iterator &,
- URL_Validation_Visitor &);
- // Constructor.
-
- virtual int execute (void);
- // Perform the strategy for processing an HTML file. This strategy
- // iterates over the HTML file and recursively visits embedded links
- // to process them, as well.
-
-private:
- URL_Validation_Visitor &visitor_context_;
- // This is the context of the visit.
-};
-
-class URL_Download_Strategy : public URL_Processing_Strategy
-{
- // = TITLE
- // Defines a URL downloading strategy.
- //
- // = DESCRIPTION
- // This class downloads a URL's contents into a temporary file.
-public:
- URL_Download_Strategy (URL &,
- URL_Iterator &);
- // Constructor.
-
- virtual int execute (void);
- // Perform the strategy for downloading a URL to a temporary file.
-};
-
-class URL_Visitation_Strategy_Factory
-{
- // = TITLE
- // Abstract Factory for the URL visitation strategy.
- //
- // = DESCRIPTION
-public:
- URL_Visitation_Strategy_Factory (URL *);
-
- // = Factory Methods.
- virtual URL_Iterator *make_header_iterator (void) = 0;
- // Factory Method that makes the header iterator.
-
- virtual URL_Iterator *make_body_iterator (void) = 0;
- // Factory Method that makes the body iterator.
-
- virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &) = 0;
- // Factory Method that makes the header processing strategy.
-
- virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &) = 0;
- // Factory Method that makes the body processing strategy .
-
- virtual int destroy (void) = 0;
- // Close down the resources.
-
-protected:
- URL *url_;
- // Stash the URL so we don't have to pass it around.
-};
-
-class URL_Download_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
-{
- // = TITLE
- // Concrete Factory for the URL validation visitation strategy.
- //
- // = DESCRIPTION
-public:
- URL_Download_Visitation_Strategy_Factory (URL *);
- // Constructor.
-
- // = Factory Methods.
- virtual URL_Iterator *make_header_iterator (void);
- // Factory Method that makes an <HTTP_Header_Iterator>.
-
- virtual URL_Iterator *make_body_iterator (void);
- // Factory Method that makes an <HTML_Body_Iterator>.
-
- virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
- // Factory Method that makes the header processing strategy.
-
- virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
- // Factory Method that makes the body processing strategy .
-
- virtual int destroy (void);
- // Close down the resources.
-};
-
-class URL_Validation_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
-{
- // = TITLE
- // Concrete Factory for the URL validation visitation strategy.
- //
- // = DESCRIPTION
-public:
- URL_Validation_Visitation_Strategy_Factory (URL *,
- URL_Validation_Visitor &);
- // Constructor.
-
- // = Factory Methods.
- virtual URL_Iterator *make_header_iterator (void);
- // Factory Method that makes an <HTTP_Header_Iterator>.
-
- virtual URL_Iterator *make_body_iterator (void);
- // Factory Method that makes an <HTML_Body_Iterator>.
-
- virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
- // Factory Method that makes the header processing strategy.
-
- virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
- // Factory Method that makes the body processing strategy .
-
- virtual int destroy (void);
- // Close down the resources.
-
-private:
- URL_Validation_Visitor &visitor_context_;
- // Context of the visitor.
-};
-
-class URL_Visitor
-{
- // = TITLE
- // Base class for the URL Visitor.
- //
- // = DESCRIPTION
- // This class plays the "visitor" role in the Visitor pattern.
-public:
- virtual int visit (HTTP_URL &http_url) = 0;
- // Visit an <HTTP_URL>.
-
- // @@
- // virtual int visit (FTP_URL &http_url) = 0;
-
- virtual int destroy (void) = 0;
- // Cleanup the resources.
-
-protected:
- virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &) = 0;
- // Make the appropriate <URL_Visitation_Strategy_Factory>.
-};
-
-typedef int ATTRIBUTES;
-typedef ACE_Svc_Handler <ACE_SOCK_STREAM, ACE_NULL_SYNCH>
- Client_Svc_Handler;
-typedef ACE_Pair<Client_Svc_Handler *, ATTRIBUTES>
- CACHED_HANDLER;
-typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
- ACE_ADDR;
-typedef ACE_Hash<ACE_ADDR> H_KEY;
-typedef ACE_Equal_To<ACE_ADDR> C_KEYS;
-
-typedef ACE_Hash_Map_Manager_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
- HASH_MAP;
-typedef ACE_Hash_Map_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
- HASH_MAP_ITERATOR;
-typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
- HASH_MAP_REVERSE_ITERATOR;
-
-typedef ACE_Recyclable_Handler_Cleanup_Strategy<ACE_ADDR, CACHED_HANDLER, HASH_MAP>
- CLEANUP_STRATEGY;
-typedef ACE_Recyclable_Handler_Caching_Utility<ACE_ADDR, CACHED_HANDLER, HASH_MAP, HASH_MAP_ITERATOR, ATTRIBUTES>
- CACHING_UTILITY;
-
-typedef ACE_LRU_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
- LRU_CACHING_STRATEGY;
-
-//#if defined (ACE_HAS_BROKEN_EXTENDED_TEMPLATES)
-
-typedef LRU_CACHING_STRATEGY
- CACHING_STRATEGY;
-
-//#else
-
-//typedef ACE_Caching_Strategy_Adapter<ATTRIBUTES, CACHING_UTILITY, LRU_CACHING_STRATEGY>
-// LRU_CACHING_STRATEGY_ADAPTER;
-//typedef ACE_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
-// CACHING_STRATEGY;
-
-//#endif /* ACE_HAS_BROKEN_EXTENDED_TEMPLATES */
-
-
-typedef ACE_Strategy_Connector<Client_Svc_Handler, ACE_SOCK_CONNECTOR>
- STRATEGY_CONNECTOR;
-
-typedef ACE_NOOP_Creation_Strategy<Client_Svc_Handler>
- NULL_CREATION_STRATEGY;
-
-typedef ACE_NOOP_Concurrency_Strategy<Client_Svc_Handler>
- NULL_ACTIVATION_STRATEGY;
-
-typedef ACE_Cached_Connect_Strategy_Ex<Client_Svc_Handler, ACE_SOCK_CONNECTOR, CACHING_STRATEGY, ATTRIBUTES, ACE_SYNCH_NULL_MUTEX>
- CACHED_CONNECT_STRATEGY;
-
-class URL_Validation_Visitor : public URL_Visitor
-{
- // = TITLE
- // Subclass that defines the URL validation visitor.
- //
- // = DESCRIPTION
- // This class checks to make sure that the <HTTP_URL> is valid.
- // If the <HTTP_URL> is an <HTML> file, it can also be used to
- // recursively check that all embedded links in this file are
- // valid.
-public:
- typedef ACE_Hash_Map_Manager <ACE_URL_Addr, URL_Status, ACE_Null_Mutex>
- URL_CACHE;
-
- virtual int visit (HTTP_URL &http_url);
- // Visit an <HTTP_URL> to make sure that it's valid. If the content
- // type of the <HTTP_URL> is "text/html" and the <recursion> option
- // is enabled then <visit> recursively checks each link embedded in
- // the HTML page.
-
- // @@
- // virtual int visit (FTP_URL &http_url);
-
- URL_Validation_Visitor (void);
- virtual int destroy (void);
- // Cleanup the resources.
-
- URL_CACHE &url_cache (void);
- // Returns a reference to the URL cache.
-
-
- /*
-
-
- typedef ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>
- Svc_Handler;
- typedef ACE_Strategy_Connector<Svc_Handler, ACE_SOCK_CONNECTOR>
- STRAT_CONNECTOR;
- typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
- REFCOUNTED_HASH_RECYCLABLE_ADDRESS;
- typedef ACE_NOOP_Creation_Strategy<Svc_Handler>
- NULL_CREATION_STRATEGY;
- typedef ACE_NOOP_Concurrency_Strategy<Svc_Handler>
- NULL_ACTIVATION_STRATEGY;
-
- typedef ACE_Hash_Map_Manager_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
- ACE_Pair<Svc_Handler *, int>,\
- ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
- ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
- ACE_Null_Mutex>
- CONNECTION_HASH_MAP;
- typedef ACE_Hash_Map_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
- ACE_Pair<Svc_Handler *, int>,\
- ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
- ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
- ACE_Null_Mutex>
- CONNECTION_HASH_MAP_ITERATOR;
- typedef ACE_Hash_Map_Reverse_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
- ACE_Pair<Svc_Handler *, int>,\
- ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \
- ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\
- ACE_Null_Mutex>
- CONNECTION_HASH_MAP_REVERSE_ITERATOR;
- typedef ACE_Pair_Caching_Utility <REFCOUNTED_HASH_RECYCLABLE_ADDRESS, \
- ACE_Pair<Svc_Handler *, int>, \
- CONNECTION_HASH_MAP, CONNECTION_HASH_MAP_ITERATOR, int >
- CACHING_STRATEGY_UTILITY;
- typedef ACE_LRU_Caching_Strategy<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\
- ACE_Pair<Svc_Handler *, int>,\
- CONNECTION_HASH_MAP, int,\
- CACHING_STRATEGY_UTILITY >
- LRU;
- typedef ACE_Cached_Connect_Strategy_Ex<Svc_Handler,ACE_SOCK_CONNECTOR, LRU,int, ACE_SYNCH_NULL_MUTEX>
- CACHED_CONNECT_STRATEGY;
- */
-protected:
- virtual ~URL_Validation_Visitor (void);
- virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
- // Factory Method that makes a
- // <URL_Validation_Visitation_Strategy_Factory>.
-
- URL_CACHE url_cache_;
- // Cache the status of URLs we've already validated.
-
- int in_cache (const ACE_URL_Addr &url_addr);
- // Check to see if the reply status of this <url_addr> is in the
- // cache. Returns 1 if so, 0 if not.
-
- NULL_CREATION_STRATEGY creation_strategy_;
- NULL_ACTIVATION_STRATEGY activation_strategy_;
-
- // Configure the Strategy Connector with a strategy that caches
- // connection.
- CACHED_CONNECT_STRATEGY *caching_connect_strategy_;
-
- STRATEGY_CONNECTOR *strat_connector_;
-
- CACHING_STRATEGY caching_strategy_;
-};
-
-
-class URL_Download_Visitor : public URL_Visitor
-{
- // = TITLE
- // Subclass for the URL validtion visitor.
- //
- // = DESCRIPTION
- // This class checks to make sure that the <HTTP_URL> is valid.
-public:
- virtual int visit (HTTP_URL &http_url);
- // Visit an <HTTP_URL> to make sure that it's valid. If the content
- // type of the <HTTP_URL> is "text/html" and the <recursion> option
- // is enabled then <visit> recursively checks each link embedded in
- // the HTML page.
-
- // @@
- // virtual int visit (FTP_URL &http_url);
-
- virtual int destroy (void);
- // Cleanup the resources.
-
-protected:
- URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
- // Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>.
-};
-
-template <class T>
-class Auto_Destroyer
-{
- // = TITLE
- // Simple class that ensures the <destroy> method is called on our
- // <URL_*> objects when they go out of scope.
- //
- // = DESCRIPTION
- // This class is similar to an auto_ptr<> and should be used to
- // simplify blocks of code that must create/destroy pointers to
- // various <URL_*> related strategies and iterators.
-public:
- Auto_Destroyer (T *t): t_ (t) {}
- T *operator-> (void) { return this->t_; }
- T *operator *(void) { return this->t_; }
- void operator= (T *t)
- {
- if (this->t_ != 0)
- this->t_->destroy ();
- this->t_ = t;
- }
- ~Auto_Destroyer (void)
- {
- if (this->t_ != 0)
- t_->destroy ();
- }
-private:
- T *t_;
-};
-
-#endif /* _URL_VISITOR_H */