diff options
author | kirthika <kirthika@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 1999-05-03 20:18:07 +0000 |
---|---|---|
committer | kirthika <kirthika@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 1999-05-03 20:18:07 +0000 |
commit | 548acc548ea4e483831669f45b2833299a73196b (patch) | |
tree | 57815562fd64d0c2fda9ca9a15456e5b01953f06 /examples | |
parent | 5ba4e943863b8a056d3cd8ddc04f773339ad43ce (diff) | |
download | ATCD-548acc548ea4e483831669f45b2833299a73196b.tar.gz |
part of HTTP_1.1_Client
Diffstat (limited to 'examples')
21 files changed, 3551 insertions, 0 deletions
diff --git a/examples/Web_Crawler/Iterators.cpp b/examples/Web_Crawler/Iterators.cpp new file mode 100644 index 00000000000..15bb8a3450a --- /dev/null +++ b/examples/Web_Crawler/Iterators.cpp @@ -0,0 +1,162 @@ +// $Id$ + +#include "Options.h" +#include "Iterators.h" + +ACE_RCSID(HTTP_1.1_Client, Iterators, "$Id$") + +URL_Iterator::~URL_Iterator (void) +{ +} + +int +URL_Iterator::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} + +HTML_Body_Iterator::HTML_Body_Iterator (URL &url) + : url_ (url) +{ +} + +int +HTML_Body_Iterator::next (ACE_CString &url) +{ + size_t len = BUFSIZ; + const char *buf; + ACE_CString buffer; + int href_index = 0; + + for (buf = this->url_.stream ().recv (len); + buf > 0; + buf = this->url_.stream ().recv (len)) + { + + buffer.set (buf, BUFSIZ, 1); + + href_index = buffer.find ("HREF"); + + if (href_index < 0) + href_index = buffer.find ("href"); + + // Grep fpr " and grab the string until end-" + if ( href_index > 0) + { + // Get back to buffer start location. + this->url_.stream ().seek (-1 * len, SEEK_CUR); + + int start_index = buffer.find ('\"', + href_index); + if (start_index <= 0) + break; + + start_index += href_index; + + int end_index = buffer.find ('\"', + start_index + 1); + if (end_index <= 0) + break; + + end_index += start_index + 1; + + ssize_t url_len = end_index - (start_index + 1); + + ACE_CString temp = buffer.substring (start_index + 1, + url_len); + url.set (temp.c_str (), len, 1); + + this->url_.stream ().seek (end_index + 1); + + return url_len; + } + } + return 0; + +} + +HTTP_Header_Iterator::HTTP_Header_Iterator (URL &url) + : url_ (url), + end_of_header_ (0) +{ +} + +int +HTTP_Header_Iterator::next (ACE_CString &line) +{ + if (this->end_of_header_) + return 0; + else + { + for (char c; + (c = this->url_.stream ().get_char ()) != EOF; + ) + { + // Check to see if we're at the end of the header line. + if (c == '\r' && this->url_.stream ().peek_char (0) == '\n') + { + line.set (this->url_.stream ().recv (), + this->url_.stream ().recv_len () - 1, + 1); + + // Check to see if we're at the end of the header. + if (this->url_.stream ().peek_char (1) == '\r' + && this->url_.stream ().peek_char (2) == '\n') + { + this->end_of_header_ = 1; + // We're at the end of the header section. + this->url_.stream ().seek (3); + } + else + // We're at the end of the line. + this->url_.stream ().seek (1); + + return 1; + } + // Handle broken Web servers that use '\n' instead of + // '\r\n'. + else if (c == '\n') + { + line.set (this->url_.stream ().recv (), + (this->url_.stream ().recv_len ()), + 1); + + // Check to see if we're at the end of the header. + if (this->url_.stream ().peek_char (0) == '\n') + { + // We're at the end of the header section. + this->url_.stream ().seek (1); + this->end_of_header_ = 1; + } + + return 1; + } + } + + } + return 0; +} + +URL_Download_Iterator::URL_Download_Iterator (URL &url) + : url_ (url) +{ +} + +int +URL_Download_Iterator::next (ACE_CString &buffer) +{ + size_t len = BUFSIZ; + + const char *buf = this->url_.stream ().recv (len); + + + if (buf == 0) + return 0; + else + { + buffer.set (buf, len, 1); + return 1; + } +} diff --git a/examples/Web_Crawler/Iterators.h b/examples/Web_Crawler/Iterators.h new file mode 100644 index 00000000000..b171b0c2aeb --- /dev/null +++ b/examples/Web_Crawler/Iterators.h @@ -0,0 +1,117 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// Iterators.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _ITERATORS_H +#define _ITERATORS_H + +#include "URL.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +class URL_Iterator +{ + // = TITLE + // An abstract base class that defines an iterator. + // + // = DESCRIPTION + // Subclasses of this base class can define what strings + // to return from <next>. This class decouples higher-level + // software from the details of whatever type of URL header or + // body we're iterating over. +public: + // = Initialization and termination methods. + virtual int destroy (void); + // "virtual" destructor. + + // = Iterator methods. + virtual int next (ACE_CString &string) = 0; + // Pass back the next <string> that hasn't been seen yet. Returns 0 + // when all items have been seen, else 1. + +protected: + virtual ~URL_Iterator (void); + // C++ destructor. +}; + +class HTML_Body_Iterator : public URL_Iterator +{ + // = TITLE + // An iterator that returns URLs embedded in HTML files. +public: + // = Initialization and termination methods. + HTML_Body_Iterator (URL &url); + // Constructor. + + // = Iterator methods. + virtual int next (ACE_CString &url); + // Pass back the next <url> that hasn't been seen in the + // memory-mapped file. Returns 0 when all items have been seen, + // else 1. + +private: + URL &url_; + // HTTP URL that we're iterating over. +}; + +class HTTP_Header_Iterator : public URL_Iterator +{ + // = TITLE + // An iterator that iterates over the HTTP header. +public: + // = Initialization and termination methods. + HTTP_Header_Iterator (URL &url); + // Constructor. + + // = Iterator methods. + virtual int next (ACE_CString &line); + // Pass back the next <line> that hasn't been seen in the + // memory-mapped file header. Returns 0 when we've reached the end + // of the header. seen, else 1. + +private: + URL &url_; + // HTTP URL that we're iterating over. + + int end_of_header_; + // We've found the end of the header, which means this iterator is + // finished. +}; + +class URL_Download_Iterator : public URL_Iterator +{ + // = TITLE + // An iterator that iterates over the contents of an entire URL, + // i.e., both header and body, and returns it in <BUFSIZ> + // <buffer>s. +public: + // = Initialization and termination methods. + URL_Download_Iterator (URL &url); + // Constructor. + + // = Iterator methods. + virtual int next (ACE_CString &buffer); + // Pass back the next <buffer> data from the stream, where + // <buffer.size> <= <BUFSIZ> . Returns 0 when we've reached the end + // of the header, else 1. + +private: + URL &url_; + // HTTP URL that we're iterating over. +}; + +#endif /* _ITERATORS_H */ diff --git a/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.cpp b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.cpp new file mode 100644 index 00000000000..3363325fec3 --- /dev/null +++ b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.cpp @@ -0,0 +1,420 @@ +// $Id$ + +#ifndef ACE_OPTIMAL_CACHE_MAP_MANAGER_T_C +#define ACE_OPTIMAL_CACHE_MAP_MANAGER_T_C + +#define ACE_BUILD_DLL + +#include "Optimal_Cache_Map_Manager_T.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ace/Malloc.h" +#include "ace/Service_Config.h" + +#if !defined (__ACE_INLINE__) +#include "Optimal_Cache_Map_Manager_T.i" +#endif /* __ACE_INLINE__ */ + +ACE_RCSID(HTTP_1.1_Client, Optimal_Cache_Map_Manager_T, "$Id$") + +ACE_ALLOC_HOOK_DEFINE(ACE_Pair) + +ACE_ALLOC_HOOK_DEFINE(ACE_Reference_Pair) + +ACE_ALLOC_HOOK_DEFINE(ACE_Optimal_Cache_Map_Manager) + +ACE_ALLOC_HOOK_DEFINE(ACE_Optimal_Cache_Map_Iterator) + +ACE_ALLOC_HOOK_DEFINE(ACE_Optimal_Cache_Map_Reverse_Iterator) + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::bind (const KEY &key, + const VALUE &value) +{ + // Insert a entry which has the <key> and the <cache_value> which is + // the combination of the <value> and the attributes of the caching strategy. + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + + int bind_result =this->map_.bind (key, + cache_value); + if (bind_result != -1) + { + int result = this->caching_strategy_.notify_bind (bind_result, + cache_value.second ()); + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the bind operation is not complete. + bind_result = -1; + } + } + return bind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>:: bind (const KEY &key, + const VALUE &value, + ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::CACHE_ENTRY *&entry) +{ + // Insert a entry which has the <key> and the <cache_value> which is + // the combination of the <value> and the attributes of the caching strategy. + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + + int bind_result =this->map_.bind (key, + cache_value, + entry); + if (bind_result != -1) + { + int result = this->caching_strategy_.notify_bind (bind_result, + cache_value.second ()); + + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the bind operation is not complete. + bind_result = -1; + } + } + return bind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rebind (const KEY &key, + const VALUE &value) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + int rebind_result = this->map_.rebind (key, + cache_value); + if (rebind_result != -1) + { + int result = this->caching_strategy_.notify_rebind (rebind_result, + cache_value.second ()); + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the rebind operation is not complete. + rebind_result = -1; + } + } + return rebind_result; +} + + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rebind (const KEY &key, + const VALUE &value, + VALUE &old_value) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + ACE_Pair<VALUE, ATTRIBUTES> old_cache_value (old_value, + this->caching_strategy_.attributes ()); + int rebind_result = this->map_.rebind (key, + cache_value, + old_cache_value); + if (rebind_result != -1) + { + int result = this->caching_strategy_.notify_rebind (rebind_result, + cache_value.second ()); + + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the rebind operation is not complete. + rebind_result = -1; + } + else + old_value = old_cache_value.first (); + } + return rebind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rebind (const KEY &key, + const VALUE &value, + KEY &old_key, + VALUE &old_value) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + ACE_Pair<VALUE, ATTRIBUTES> old_cache_value (old_value, + this->caching_strategy_.attributes ()); + int rebind_result = this->map_.rebind (key, + cache_value, + old_key, + old_cache_value); + if (rebind_result != -1) + { + int result = this->caching_strategy_.notify_rebind (rebind_result, + cache_value.second ()); + + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the rebind operation is not complete. + rebind_result = -1; + } + else + old_value = old_cache_value.first (); + } + return rebind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rebind (const KEY &key, + const VALUE &value, + ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::CACHE_ENTRY *&entry) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + int rebind_result = this->map_.rebind (key, + cache_value, + entry); + if (rebind_result != -1) + { + int result = this->caching_strategy_.notify_rebind (rebind_result, + cache_value.second ()); + if (result == -1) + { + this->map_.unbind (key, + cache_value); + // Unless the notification goes thru the rebind operation is not complete. + rebind_result = -1; + } + } + return rebind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::trybind (const KEY &key, + VALUE &value) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + int trybind_result = this->map_.trybind (key, + cache_value); + if (trybind_result != -1) + { + int result = this->caching_strategy_.notify_trybind (trybind_result, + cache_value.second ()); + if (result == -1) + { + // If the entry has got inserted into the map, it is removed + // due to failure. + if (trybind_result == 0) + this->map_.unbind (key, + cache_value); + trybind_result = -1; + } + else + { + // If an attempt is made to bind an existing entry the value + // is overwritten with the value from the map. + if (trybind_result == 1) + value = cache_value.first (); + } + + } + return trybind_result; +} +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::trybind (const KEY &key, + VALUE &value, + ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::CACHE_ENTRY *&entry) +{ + ACE_Pair<VALUE, ATTRIBUTES> cache_value (value, + this->caching_strategy_.attributes ()); + int trybind_result = this->map_.trybind (key, + cache_value, + entry); + if (trybind_result != -1) + { + int result = this->caching_strategy_.notify_trybind (trybind_result, + cache_value.second ()); + if (result == -1) + { + // If the entry has got inserted into the map, it is removed + // due to failure. + if (trybind_result == 0) + this->map_.unbind (key, + cache_value); + trybind_result = -1; + } + else + { + // If an attempt is made to bind an existing entry the value + // is overwritten with the value from the map. + if (trybind_result == 1) + value = cache_value.first (); + } + + } + return trybind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::find (const KEY& key, + VALUE& value) +{ + // Lookup the key and populate the <value>. + ACE_Pair<VALUE, ATTRIBUTES> cache_value; + + int find_result = this->map_.find (key, + cache_value); + if (find_result != -1) + { + int result = this->caching_strategy_.notify_find (find_result, + cache_value.second ()); + // Unless the find and notification operations go thru, this + // method is not successful. + if (result == -1) + find_result = -1; + else + { + // Since the <cache_value> has now changed after the + // notification, we need to bind to the map again. + int rebind_result = this->map_.rebind (key, + cache_value); + if (rebind_result == -1) + find_result = -1; + else + value = cache_value.first (); + } + } + return find_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::find (const KEY& key) +{ + // Lookup the key and populate the <value>. + ACE_Pair<VALUE, ATTRIBUTES> cache_value; + + int find_result = this->map_.find (key, + cache_value); + if (find_result != -1) + { + int result = this->caching_strategy_.notify_find (find_result, + cache_value.second ()); + // Unless the find and notification operations go thru, this + // method is not successful. + if (result == -1) + find_result = -1; + else + { + // Since the <cache_value> has now changed after the + // notification, we need to bind to the map again. + int rebind_result = this->map_.rebind (key, + cache_value); + if (rebind_result == -1) + find_result = -1; + } + } + return find_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::find (const KEY& key, + ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::CACHE_ENTRY *&entry) +{ + // Lookup the key and populate the <value>. + int find_result = this->map_.find (key, + entry); + if (find_result != -1) + { + int result = this->caching_strategy_.notify_find (find_result, + entry->int_id_.second ()); + // Unless the find and notification operations go thru, this + // method is not successful. + if (result == -1) + find_result = -1; + else + { + // Since the <cache_value> has now changed after the + // notification, we need to bind to the map again. + int rebind_result = this->map_.rebind (key, + entry->int_id_); + if (rebind_result == -1) + find_result = -1; + } + } + return find_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::unbind (const KEY& key) +{ + // Remove the entry from the cache. + ACE_Pair<VALUE, ATTRIBUTES> cache_value; + int unbind_result = this->map_.unbind (key, + cache_value); + + if (unbind_result != -1) + { + int result = this->caching_strategy_.notify_unbind (unbind_result, + cache_value.second ()); + + if (result == -1) + unbind_result = -1; + } + return unbind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::unbind (const KEY& key, + VALUE& value) +{ + // Remove the entry from the cache. + ACE_Pair<VALUE, ATTRIBUTES> cache_value; + + int unbind_result = this->map_.unbind (key, + cache_value); + + if (unbind_result != -1) + { + int result = this->caching_strategy_.notify_unbind (unbind_result, + cache_value.second ()); + if (result == -1) + unbind_result = -1; + else + value = cache_value.first (); + } + return unbind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::unbind (ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::CACHE_ENTRY *entry) +{ + // Remove the entry from the cache. + int unbind_result = this->map_.unbind (entry); + + + if (unbind_result != -1) + { + int result = this->caching_strategy_.notify_unbind (unbind_result, + entry->int_id_.second ()); + if (result == -1) + unbind_result = -1; + } + return unbind_result; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> void +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::dump (void) const +{ + this->map_.dump (); + this->caching_strategy_.dump (); +} + +#endif /* ACE_OPTIMAL_CACHE_MAP_MANAGER_T_C */ diff --git a/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.h b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.h new file mode 100644 index 00000000000..1390f3fe255 --- /dev/null +++ b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.h @@ -0,0 +1,377 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// ace +// +// = FILENAME +// Optimal_Cache_Map_Manager.h +// +// = AUTHOR +// Kirthika Parameswaran <kirthika@cs.wustl.edu> +// +// ============================================================================ +#ifndef OPTIMAL_CACHE_MAP_MANAGER_T_H +#define OPTIMAL_CACHE_MAP_MANAGER_T_H + +#include "ace/OS.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ace/Pair_T.h" +#include "ace/Hash_Map_Manager_T.h" + +// Forward declaration. +class ACE_Allocator; +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> +class ACE_Optimal_Cache_Map_Iterator; +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> +class ACE_Optimal_Cache_Map_Reverse_Iterator; +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> +class ACE_Optimal_Cache_Map_Manager +{ + // = TITLE + // Defines a abstraction which will take care of caching of a map. + // The map considered is the ACE_Hash_Map_Manager. + // + // = DESCRIPTION + // The Optimal_Cache_Map_Manager will prepare the map given into its + // custody for purging on demand. The strategy for caching is + // decided by the developer and provided to the Cache Manager + // who acts as a agent and communicates between the Map and the + // Strategy for purging entries from the map. + // To tap the optimal methods like find (key, value, entry) present + // in the ACE_Hash_Map_Manager, it is used as the default map. + // + // No locking mechanism provided since locking at this level isnt efficient. + // Locking has to be provided by the application. +public: + + // = Traits. + typedef KEY key_type; + typedef VALUE mapped_type; + + typedef ACE_TYPENAME CACHING_STRATEGY::ATTRIBUTES ATTRIBUTES; + typedef ACE_Pair<VALUE, ATTRIBUTES> CACHE_VALUE; + // The actual value mapped to the key in the cache. The <attributes> + // are used by the strategy and is transperant to the cache user. + + typedef ACE_Hash_Map_Manager_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> MAP; + typedef ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > CACHE_ENTRY; + + friend class ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>; + friend class ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>; + + // = ACE-style iterator typedefs. + typedef ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> ITERATOR; + typedef ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> REVERSE_ITERATOR; + + // = STL-style iterator typedefs. + typedef ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> iterator; + typedef ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> reverse_iterator; + + // = Initialization and termination methods. + + ACE_Optimal_Cache_Map_Manager (ACE_Allocator *alloc = 0, + ATTRIBUTES attributes = 0, + int purge_percent = 10); + // Initialize a <Optimal_Cache_Map_Manager> with the <ACE_DEFAULT_MAP_SIZE>. + + ACE_Optimal_Cache_Map_Manager (size_t size, + ACE_Allocator *alloc = 0, + ATTRIBUTES attributes = 0, + int purge_percent = 10 ); + // Initialize a <Optimal_Cache_Map_Manager> with <size> entries. + + ~ACE_Optimal_Cache_Map_Manager (void); + // Close down a <Optimal_Cache_Map_Manager> and release dynamically allocated + // resources. + + int open (size_t length = ACE_DEFAULT_MAP_SIZE, + ACE_Allocator *alloc = 0); + // Initialise a cache with size <length>. + + int close (void); + // Close down a cache and release dynamically allocated resources. + + int bind (const KEY &key, + const VALUE &value); + // Associate <key> with <value>. If <key> is already in the + // MAP then the ENTRY is not changed. Returns 0 if a new entry is + // bound successfully, returns 1 if an attempt is made to bind an + // existing entry, and returns -1 if failures occur. + + int bind (const KEY &key, + const VALUE &value, + ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > *&entry); + // Same as a normal bind, except the cache entry is also passed back + // to the caller. The entry in this case will either be the newly + // created entry, or the existing one. + + int find (const KEY &key, + VALUE &value); + // Loopkup entry<key,value> in the cache. + + int find (const KEY &key); + // Is <key> in the cache? + + int find (const KEY &key, + ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > *&entry); + // Obtain the entry when the find succeeds. + + int rebind (const KEY &key, + const VALUE &value); + // Reassociate the <key> with <value>. If the <key> already exists + // in the cache then returns 1, on a new bind returns 0 and returns + // -1 in case of any failures. + + int rebind (const KEY &key, + const VALUE &value, + VALUE &old_value); + // Reassociate <key> with <value>, storing the old value into the + // "out" parameter <old_value>. The function fails if <key> is not + // in the cache for caches that do not allow user specified keys. + // However, for caches that allow user specified keys, if the key is + // not in the cache, a new <key>/<value> association is created. + + int rebind (const KEY &key, + const VALUE &value, + KEY &old_key, + VALUE &old_value); + // Reassociate <key> with <value>, storing the old key and value + // into the "out" parameters <old_key> and <old_value>. The + // function fails if <key> is not in the cache for caches that do not + // allow user specified keys. However, for caches that allow user + // specified keys, if the key is not in the cache, a new <key>/<value> + // association is created. + + int rebind (const KEY &key, + const VALUE &value, + ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > *&entry); + // Same as a normal rebind, except the cache entry is also passed back + // to the caller. The entry in this case will either be the newly + // created entry, or the existing one. + + int trybind (const KEY &key, + VALUE &value); + // Associate <key> with <value> if and only if <key> is not in the + // cache. If <key> is already in the cache, then the <value> parameter + // is overwritten with the existing value in the cache. Returns 0 if a + // new <key>/<value> association is created. Returns 1 if an + // attempt is made to bind an existing entry. This function fails + // for maps that do not allow user specified keys. + + int trybind (const KEY &key, + VALUE &value, + ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > *&entry); + // Same as a normal trybind, except the cache entry is also passed + // back to the caller. The entry in this case will either be the + // newly created entry, or the existing one. + + int unbind (const KEY &key); + // Remove <key> from the cache. + + int unbind (const KEY &key, + VALUE &value); + // Remove <key> from the cache, and return the <value> associated with + // <key>. + + int unbind (ACE_Hash_Map_Entry<KEY, ACE_Pair<VALUE, ATTRIBUTES> > *entry); + // Remove entry from map. + + int purge (void); + // Remove entries from the cache depending upon the strategy. + + size_t current_size (void); + // Return the current size of the cache. + + size_t total_size (void); + // Return the total size of the cache. + + void dump (void) const; + // Dumps the state of the object. + + // = STL styled iterator factory functions. + + ITERATOR begin (void); + ITERATOR end (void); + // Return forward iterator. + + REVERSE_ITERATOR rbegin (void); + REVERSE_ITERATOR rend (void); + // Return reverse iterator. + +protected: + + ACE_Hash_Map_Manager_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> &map (void); + // The map managed by the Optimal_Cache_Map_Manager. + + CACHING_STRATEGY &caching_strategy (void); + // The caching strategy used on the cache. + + ACE_Hash_Map_Manager_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> map_; + // The underlying map which needs to be cached. + + CACHING_STRATEGY caching_strategy_; + // The strategy to be followed for caching entries in the map. +}; +//////////////////////////////////////////////////////////////////////////////// +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> +class ACE_Optimal_Cache_Map_Iterator +{ + // = TITLE + // Defines a iterator for the Optimal_Cache_Map_Manager. + // + // = DESCRIPTION + // Implementation to be provided by the iterator of the map + // managed by the ACE_Optimal_Cache_Map_Manager. + +public: + + // = Traits. + typedef ACE_TYPENAME CACHING_STRATEGY::ATTRIBUTES ATTRIBUTES; + typedef ACE_Pair <VALUE, ATTRIBUTES> CACHE_VALUE; + // The actual value mapped to the key in the cache. The <attributes> + // are used by the strategy and is transperant to the cache user. + + typedef ACE_Reference_Pair<KEY, VALUE> value_type; + typedef ACE_Hash_Map_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> IMPLEMENTATION; + // = Initialisation and termination methods. + + ACE_Optimal_Cache_Map_Iterator (const ACE_Hash_Map_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> &iterator_impl); + + ACE_Optimal_Cache_Map_Iterator (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // Copy constructor. + + ~ACE_Optimal_Cache_Map_Iterator (void); + + // = Iteration methods. + + ACE_Optimal_Cache_Map_Iterator &operator= (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // assignment operator. + + int operator== (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + int operator!= (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // Comparision operators. + + ACE_Reference_Pair<KEY, VALUE> operator* (void) const; + // Returns a reference to the internal element <this> is pointing to. + + // = STL styled iteration, compare, and reference functions. + + ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &operator++ (void); + // Prefix advance + + ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> operator++ (int); + // Postfix advance. + + ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &operator-- (void); + // Prefix reverse. + + ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> operator-- (int); + // Postfix reverse. + + ACE_Hash_Map_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> iterator_implementation (void) const; + // Returns the iterator of the internal map in the custody of the Optimal_Cache_Map_Manager. + + void dump (void) const; + // Dump the state of an object. + + ACE_ALLOC_HOOK_DECLARE; + // Declare the dynamic allocation hooks. + +protected: + ACE_Hash_Map_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> iterator_implementation_; + // The actual iterator which iterates internally on the map + // belonging to the Optimal_Cache_Map_Manager. +}; + +//////////////////////////////////////////////////////////////////////////////// +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> +class ACE_Optimal_Cache_Map_Reverse_Iterator +{ + // = TITLE + // Defines a reverse iterator for the Optimal_Cache_Map_Manager. + // + // = DESCRIPTION + // Implementation to be provided by the reverse iterator of the map + // managed by thr Optimal_Cache_Map_manager. + +public: + + // = Traits. + typedef ACE_TYPENAME CACHING_STRATEGY::ATTRIBUTES ATTRIBUTES; + typedef ACE_Pair <VALUE, ATTRIBUTES> CACHE_VALUE; + // The actual value mapped to the key in the cache. The <attributes> + // are used by the strategy and is transperant to the cache user. + + typedef ACE_Reference_Pair<KEY, VALUE> value_type; + typedef ACE_Hash_Map_Reverse_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> REVERSE_IMPLEMENTATION; + // = Initialisation and termination methods. + + ACE_Optimal_Cache_Map_Reverse_Iterator (const REVERSE_IMPLEMENTATION &iterator_impl); + + ACE_Optimal_Cache_Map_Reverse_Iterator (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // Copy constructor. + + ~ACE_Optimal_Cache_Map_Reverse_Iterator (void); + + // = Iteration methods. + + ACE_Optimal_Cache_Map_Reverse_Iterator &operator= (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // Assignment operator. + + int operator== (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + int operator!= (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs); + // Comparision operators. + + ACE_Reference_Pair<KEY, VALUE> operator* (void) const; + // Returns a reference to the internal element <this> is pointing to. + + // = STL styled iteration, compare, and reference functions. + + ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &operator++ (void); + // Prefix advance + + ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> operator++ (int); + // Postfix advance. + + ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &operator-- (void); + // Prefix reverse. + + ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> operator-- (int); + // Postfix reverse. + + ACE_Hash_Map_Reverse_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> iterator_implementation (void) const; + // Returns the iterator of the internal map in the custody of the Optimal_Cache_Map_Manager. + + void dump (void) const; + // Dump the state of an object. + + ACE_ALLOC_HOOK_DECLARE; + // Declare the dynamic allocation hooks. + +protected: + ACE_Hash_Map_Reverse_Iterator_Ex<KEY, ACE_Pair<VALUE, ATTRIBUTES>, HASH_KEY, COMPARE_KEYS, ACE_Null_Mutex> reverse_iterator_implementation_; + // The actual iterator which iterates internally on the map + // belonging to the Optimal_Cache_Map_Manager. +}; + +#if defined (__ACE_INLINE__) +#include "Optimal_Cache_Map_Manager_T.i" +#endif /* __ACE_INLINE__ */ + +#if defined (ACE_TEMPLATES_REQUIRE_SOURCE) +#include "Optimal_Cache_Map_Manager_T.cpp" +#endif /* ACE_TEMPLATES_REQUIRE_SOURCE */ + +#if defined (ACE_TEMPLATES_REQUIRE_PRAGMA) +#pragma implementation "Optimal_Cache_Map_Manager_T.cpp" +#endif /* ACE_TEMPLATES_REQUIRE_PRAGMA */ + + +#endif /* OPTIMAL_CACHE_MAP_MANAGER_T_H */ diff --git a/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.i b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.i new file mode 100644 index 00000000000..d0e30c78b74 --- /dev/null +++ b/examples/Web_Crawler/Optimal_Cache_Map_Manager_T.i @@ -0,0 +1,279 @@ +/* -*- C++ -*- */ +//$Id$ + +#include "Optimal_Cache_Map_Manager_T.h" + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Manager (ACE_Allocator *alloc, + ATTRIBUTES attributes, + int purge_percent) + :map_ (alloc), + caching_strategy_ (map_, attributes, purge_percent) +{ +} + + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Manager (size_t size, + ACE_Allocator *alloc, + ATTRIBUTES attributes, + int purge_percent) + :map_ (size, + alloc), + caching_strategy_ (map_, attributes, purge_percent) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::~ACE_Optimal_Cache_Map_Manager (void) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::open (size_t length, + ACE_Allocator *alloc) +{ + return this->map_.open (length, + alloc); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::purge (void) +{ + ACE_DEBUG ((LM_DEBUG, "ACE_Optimal_Cache_Map_Manager::PURGE\n")); + return this->caching_strategy ().clear_cache (this->map ()); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE size_t +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::current_size (void) +{ + return this->map_.current_size (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE size_t +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::total_size (void) +{ + return this->map_.total_size (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::close (void) +{ + return this->map_.close (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::MAP & +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::map (void) +{ + return this->map_; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE CACHING_STRATEGY & +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::caching_strategy (void) +{ + return this->caching_strategy_; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ITERATOR +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::begin (void) +{ + return ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ITERATOR (this->map_.begin ()); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ITERATOR +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::end (void) +{ + return ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ITERATOR (this->map_.end ()); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_ITERATOR +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rbegin (void) +{ + return ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_ITERATOR (this->map_.rbegin ()); +} +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_ITERATOR +ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::rend (void) +{ + return ACE_Optimal_Cache_Map_Manager<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_ITERATOR (this->map_.rend ()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Iterator (const ACE_Optimal_Cache_Map_Iterator <KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) + : iterator_implementation_ (rhs.iterator_implementation ()) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::~ACE_Optimal_Cache_Map_Iterator (void) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator= (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + this->iterator_implementation_ = rhs.iterator_implementation (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator== (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + return this->iterator_implementation_ == rhs.iterator_implementation (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator!= (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + return this->iterator_implementation_ != rhs.iterator_implementation (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Reference_Pair<KEY, VALUE> +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator* (void) const +{ + value_type retv ((*this->iterator_implementation_).ext_id_, (*this->iterator_implementation_).int_id_.first ()); + return retv; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator++ (void) +{ + this->iterator_implementation_.operator++ (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator++ (int) +{ + this->iterator_implementation_.operator++ (1); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator-- (void) +{ + this->iterator_implementation_.operator-- (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator-- (int) +{ + this->iterator_implementation_.operator-- (1); + return *this; +} + + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE void +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::dump (void) const +{ + return this->iterator_implementation_.dump (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Iterator (const ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::IMPLEMENTATION &iterator_impl) + : iterator_implementation_(iterator_impl) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::IMPLEMENTATION +ACE_Optimal_Cache_Map_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::iterator_implementation (void) const +{ + return this->iterator_implementation_; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Reverse_Iterator (const ACE_Optimal_Cache_Map_Reverse_Iterator <KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) + : reverse_iterator_implementation_ (rhs.iterator_implementation ()) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::~ACE_Optimal_Cache_Map_Reverse_Iterator (void) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator= (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + this->reverse_iterator_implementation_ = rhs.iterator_implementation (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator== (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + return this->reverse_iterator_implementation_ == rhs.iterator_implementation (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE int +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator!= (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> &rhs) +{ + return this->reverse_iterator_implementation_ != rhs.iterator_implementation (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Reference_Pair<KEY, VALUE> +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator* (void) const +{ + value_type retv ((*this->reverse_iterator_implementation_).ext_id_, (*this->reverse_iterator_implementation_).int_id_.first ()); + return retv; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator++ (void) +{ + this->reverse_iterator_implementation_.operator++ (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator++ (int) +{ + this->reverse_iterator_implementation_.operator++ (1); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> & +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator-- (void) +{ + this->reverse_iterator_implementation_.operator-- (); + return *this; +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY> +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::operator-- (int) +{ + this->reverse_iterator_implementation_.operator-- (1); + return *this; +} + + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE void +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::dump (void) const +{ + return this->reverse_iterator_implementation_.dump (); +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::ACE_Optimal_Cache_Map_Reverse_Iterator (const ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_IMPLEMENTATION &iterator_impl) + : reverse_iterator_implementation_(iterator_impl) +{ +} + +template <class KEY, class VALUE, class HASH_KEY, class COMPARE_KEYS, class CACHING_STRATEGY> ACE_INLINE ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::REVERSE_IMPLEMENTATION +ACE_Optimal_Cache_Map_Reverse_Iterator<KEY, VALUE, HASH_KEY, COMPARE_KEYS, CACHING_STRATEGY>::iterator_implementation (void) const +{ + return this->reverse_iterator_implementation_; +} + diff --git a/examples/Web_Crawler/Options.cpp b/examples/Web_Crawler/Options.cpp new file mode 100644 index 00000000000..6253fb3cc2c --- /dev/null +++ b/examples/Web_Crawler/Options.cpp @@ -0,0 +1,195 @@ +// $Id$ + +#include "ace/Get_Opt.h" +#include "URL_Addr.h" +#include "Options.h" + +ACE_RCSID(HTTP_1.1_Client, Options, "$Id$") + +int +Options::parse_args (int argc, char *argv[]) +{ + ACE_Get_Opt getopt (argc, argv, ASYS_TEXT ("df:h:i:l:rt:u:vo:p:")); + + ACE_LOG_MSG->open (argv[0]); + + this->hostname_ = "www.cs.wustl.edu"; + this->uri_ = "~kirthika/auto_purge_client.html"; + this->recurse_ = 1; //0; + this->debug_ = 0; + this->timeout_.sec (ACE_DEFAULT_TIMEOUT); + this->url_filter_ = 0; + this->verbose_ = 0; + this->order_ = "FIFO"; + this->port_no_ = ACE_DEFAULT_HTTP_PORT; + this->handle_limit_ = 64; + // The default is to make this limit as large as possible. + // int handle_limit = 10; + + for (int c; + (c = getopt ()) != EOF; + ) + switch (c) + { + case 'd': + this->debug_ = 1; + break; + case 'f': + this->url_filter_ = getopt.optarg; + break; + case 'h': + this->hostname_ = getopt.optarg; + break; + case 'i': + this->uri_ = getopt.optarg; + break; + case 'l': + this->handle_limit_ = ACE_OS::atoi (getopt.optarg); + break; + case 'r': + this->recurse_ = 1; + break; + case 't': + this->timeout_.sec (ACE_OS::atoi (getopt.optarg)); + break; + case 'u': + { + this->hostname_ = getopt.optarg; + char *s = ACE_OS::strchr (getopt.optarg, '/'); + if (s != 0) + { + this->uri_ = s + 1; + *s = '\0'; + } + else + ACE_ERROR ((LM_ERROR, + "invalid URL %s\n", + getopt.optarg)); + } + break; + case 'v': + this->verbose_ = 1; + break; + case 'o': + { + this->order_ = getopt.optarg; + } + break; + case 'p': + this->port_no_ = ACE_OS::atoi (getopt.optarg); + break; + default: + ACE_ERROR ((LM_ERROR, + "usage: %n [-d] [-f filter] [-h hostname]" + " [-l handle-limit] [-r] [-t timeout] [-u URI]" + " [-v]\n%a", + 1)); + + /* NOTREACHED */ + } + + // Don't bother checking the return value since this is just a + // "hint" and isn't portable to all OS platforms. + + // ACE::set_handle_limit (handle_limit); + /* ACE_DEBUG ((LM_DEBUG, "Changing rlimit\n")); + struct rlimit rl; + if (getrlimit (RLIMIT_NOFILE, &rl) == -1) + cout << "getrlimit: errno = "<< errno <<endl; + + rl.rlim_cur = handle_limit; + if (setrlimit (RLIMIT_NOFILE, &rl) == -1) + cout << "setrlimit: errno = "<< errno <<endl; + ACE_ERROR_RETURN ((LM_ERROR, + "%p \n"), + -1); + if (getrlimit (RLIMIT_NOFILE, &rl) == -1) + cout << "getrlimit: errno = "<< errno <<endl; + else + cout << "limit "<< rl.rlim_cur<<endl; + ACE_DEBUG ((LM_DEBUG, "Changed rlimit\n")); */ + return 0; +} + +int +Options::port_no (void) const +{ + return this->port_no_; +} + +int +Options::recurse (void) const +{ + return this->recurse_; +} + +const ACE_Time_Value * +Options::timeout (void) const +{ + return &this->timeout_; +} + +int +Options::debug (void) const +{ + return this->debug_; +} + +int +Options::verbose (void) const +{ + return this->verbose_; +} + +LPCTSTR +Options::order (void) const +{ + return this->order_; +} +LPCTSTR +Options::hostname (void) const +{ + return this->hostname_; +} + +LPCTSTR +Options::path_name (void) const +{ + return this->uri_; +} + +LPCTSTR +Options::url_filter (void) const +{ + return this->url_filter_; +} + +Command_Processor * +Options::command_processor (void) const +{ + return this->command_processor_; +} + +void +Options::command_processor (Command_Processor *cp) +{ + this->command_processor_ = cp; +} + +URL_Visitor * +Options::visitor (void) const +{ + return this->visitor_; +} + +void +Options::visitor (URL_Visitor *v) +{ + this->visitor_ = v; +} + +int +Options::handle_limit (void) +{ + return this->handle_limit_; +} diff --git a/examples/Web_Crawler/Options.h b/examples/Web_Crawler/Options.h new file mode 100644 index 00000000000..e7f38b74d71 --- /dev/null +++ b/examples/Web_Crawler/Options.h @@ -0,0 +1,123 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// Options.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _OPTIONS_H +#define _OPTIONS_H + +#include "ace/Singleton.h" +#include "ace/Synch.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +// Forward decls. +class Command_Processor; +class URL_Visitor; + +class Options +{ + // = TITLE + // Maintains the global options. + // + // = DESCRIPTION + // This class is converted into a Singleton by the + // <ACE_Singleton> template. +public: + int parse_args (int argc, char *argv[]); + // Parse the command-line arguments and initialize the options. + + int recurse (void) const; + // If non-0 and the link is an HTML file then recursively check all + // links that are embedded in the body of file. + + LPCTSTR hostname (void) const; + // Return the hostname of the initial Web server. + + LPCTSTR path_name (void) const; + // Return the initial URI. + + LPCTSTR url_filter (void) const; + // String used to filter out which URLs to validate. + + int debug (void) const; + // Are we debugging? + + int verbose (void) const; + // Are we being verbose? + + LPCTSTR order (void) const; + // Which order? LIFO|FIFO?? + + int port_no (void) const; + // Port # + + const ACE_Time_Value *timeout (void) const; + // Return the timeout used to prevent hanging on <recv> and + // <connect> calls to broken servers. + + // = Get/set the <Command_Processor>. + Command_Processor *command_processor (void) const; + void command_processor (Command_Processor *); + + // = Get/set the <URL_Visitor>. + URL_Visitor *visitor (void) const; + void visitor (URL_Visitor *); + + // Get the handle_limit. + int handle_limit (void); +private: + int recurse_; + // Are we recursving. + + LPCTSTR hostname_; + // Initial Web server name. + + LPCTSTR uri_; + // Initial URI name. + + int debug_; + // Are we debugging? + + int verbose_; + // Are we being verbose? + + LPCTSTR order_; + // Whether the URLs are traversed in FIFO or LIFO order. + + ACE_Time_Value timeout_; + // Timeout on <recv> and <connect> to broken Web servers. + + LPCTSTR url_filter_; + // String used to filter out which URLs to validate. + + Command_Processor *command_processor_; + // Pointer to the Command_Processor. + + URL_Visitor *visitor_; + // Pointer to the <URL_Visitor>. + + int port_no_; + // Port no. + + int handle_limit_; + // The limit of the number of descriptors to be given for this process. +}; + +// Typedef an Options Singleton. +typedef ACE_Singleton <Options, ACE_Null_Mutex> OPTIONS; + +#endif /* _OPTIONS_H */ diff --git a/examples/Web_Crawler/README b/examples/Web_Crawler/README new file mode 100644 index 00000000000..4f81809173d --- /dev/null +++ b/examples/Web_Crawler/README @@ -0,0 +1,25 @@ +Web Crawler Kirthika Parameswaran +----------- + +The Web Crawler follows the HTTP_1.1 protocol. + +This Crawler crawls in either FIFO or LIFO order over the URLs +now stored in a ACE_Unbounded_Queue. The Command Processor pattern is +used in this example. + +Also the auto-purging feature where connections are removed from the cache +when the process runs out of file descriptors, is added to this example. + +[Use the -l option to set the handle limit]. + +Run: +--- + + +> make + +> main -r -u www.cs.wustl.edu/~kirthika/test.html -o LIFO + +or + +> main -r -u www.cs.wustl.edu/~kirthika/test.html -o FIFO diff --git a/examples/Web_Crawler/URL.cpp b/examples/Web_Crawler/URL.cpp new file mode 100644 index 00000000000..89512cca85b --- /dev/null +++ b/examples/Web_Crawler/URL.cpp @@ -0,0 +1,39 @@ +// $Id$ + +#include "URL.h" + +ACE_RCSID(HTTP_1.1_Client, URL, "$Id$") + +Mem_Map_Stream & +URL::stream (void) +{ + return this->stream_; +} + +URL::~URL (void) +{ +} + +const URL_Status & +URL::reply_status (void) +{ + return this->reply_status_; +} + +void +URL::reply_status (const URL_Status &rs) +{ + this->reply_status_ = rs; +} + +const ACE_CString & +URL::content_type (void) +{ + return this->content_type_; +} + +void +URL::content_type (const ACE_CString &ct) +{ + this->content_type_ = ct; +} diff --git a/examples/Web_Crawler/URL.h b/examples/Web_Crawler/URL.h new file mode 100644 index 00000000000..5b1df98be71 --- /dev/null +++ b/examples/Web_Crawler/URL.h @@ -0,0 +1,78 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// URL.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _URL_H +#define _URL_H + +#include "Mem_Map_Stream.h" +#include "URL_Addr.h" +#include "URL_Status.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +// Forward declaration. +class URL_Visitor; + +class URL +{ + // = TITLE + // Base class for a URL. + // + // = DESCRIPTION + // This class plays a role in the Visitor pattern. +public: + virtual ~URL (void); + // Destructor. + + virtual int accept (URL_Visitor *visitor) = 0; + // Accept the visitor, which will then perform a particular + // visitation strategy on the URL. This method is part of the + // Visitor pattern. + + virtual int send_request (void) = 0; + // Send a <GET> command to fetch the contents in the URI from the + // server. + + virtual const ACE_URL_Addr &url_addr (void) const = 0; + // Returns the URL that we represent. + + virtual Mem_Map_Stream &stream (void); + // Returns the <Mem_Map_Stream>. + + // = Get/set the reply status. + virtual const URL_Status &reply_status (void); + virtual void reply_status (const URL_Status &); + + // = Get/set the reply status. + virtual const ACE_CString &content_type (void); + virtual void content_type (const ACE_CString &); + + + +private: + URL_Status reply_status_; + // Reply status of the URL. + + ACE_CString content_type_; + // Content-type of the URL. + + Mem_Map_Stream stream_; + // Contents of the stream. +}; + +#endif /* _URL_H */ diff --git a/examples/Web_Crawler/URL_Addr.cpp b/examples/Web_Crawler/URL_Addr.cpp new file mode 100644 index 00000000000..f0e6e52a336 --- /dev/null +++ b/examples/Web_Crawler/URL_Addr.cpp @@ -0,0 +1,229 @@ +// $Id$ +#include "URL_Addr.h" + +ACE_RCSID(HTTP_1.1_Client, URL_Addr, "$Id$") + +ACE_URL_Addr::ACE_URL_Addr (void) + : path_name_ (0), + addr_string_ (0), + addr_string_len_ (0) +{ +} + +int +ACE_URL_Addr::addr_to_string (LPTSTR s, + size_t size, + int ipaddr_format) const +{ + size_t total_len = + ACE_OS::strlen (ipaddr_format == 0 ? + this->get_host_name () : + this->get_host_addr ()) + + ACE_OS::strlen ("65536") // Assume the max port number. + + ACE_OS::strlen (this->get_path_name ()) + + sizeof (':') + + sizeof ('/') + + sizeof ('\0'); // For trailing '\0'. + + if (size < total_len) + return -1; + else + { + ACE_OS::sprintf (s, ASYS_TEXT ("%s:%d/%s"), + ASYS_WIDE_STRING (ipaddr_format == 0 + ? this->get_host_name () + : this->get_host_addr ()), + this->get_port_number (), + this->get_path_name ()); + return 0; + } +} + +LPCTSTR +ACE_URL_Addr::addr_to_string (int ipaddr_format) const +{ + ACE_URL_Addr *this_ptr = ACE_const_cast (ACE_URL_Addr *, + this); + + size_t size = + ACE_OS::strlen (ipaddr_format == 0 ? + this->get_host_name () : + this->get_host_addr ()) + + ACE_OS::strlen ("65536") // Assume the max port number. + + ACE_OS::strlen (this->get_path_name ()) + + sizeof (':') + + sizeof ('/') + + sizeof ('\0'); // For trailing '\0'. + + if (size > this->addr_string_len_) + { + ACE_ALLOCATOR_RETURN (this_ptr->addr_string_, + (LPTSTR) ACE_OS::realloc ((void *) this->addr_string_, + size), + 0); + this_ptr->addr_string_len_ = size; + } + ACE_OS::sprintf (this->addr_string_, + ASYS_TEXT ("%s:%d/%s"), + ASYS_WIDE_STRING (ipaddr_format == 0 + ? this->get_host_name () + : this->get_host_addr ()), + this->get_port_number (), + this->get_path_name ()); + return this->addr_string_; +} + +int +ACE_URL_Addr::string_to_addr (LPCTSTR s) +{ + int result; + LPTSTR t; + + // Need to make a duplicate since we'll be overwriting the string. + ACE_ALLOCATOR_RETURN (t, + ACE_OS::strdup (s), + -1); + + + // First split off the path_name. + + LPTSTR path_name = ACE_OS::strchr (t, '/'); + LPCTSTR name = "index.html"; + if (path_name != 0) + { + if (ACE_OS::strlen (path_name + 1) > 0) + name = path_name + 1; + + *path_name = '\0'; + } + + ACE_ALLOCATOR_RETURN (this->path_name_, + // Skip over '/' + ACE_OS::strdup (name), + -1); + + // Now handle the host address and port number. + LPTSTR port_number = ACE_OS::strchr (t, ':'); + + if (port_number == 0) + { + // Assume it's an ip-address or ip-number. + result = this->ACE_INET_Addr::set (ACE_DEFAULT_HTTP_PORT, + t); + } + else + { + *port_number = '\0'; + u_short port = (u_short) ACE_OS::atoi (port_number + 1); // Skip over ':' + result = this->ACE_INET_Addr::set (port, t); + } + + ACE_OS::free (ACE_MALLOC_T (t)); + return result; +} + +ACE_URL_Addr::ACE_URL_Addr (const ACE_URL_Addr &addr) + : path_name_ (0), + addr_string_ (0), + addr_string_len_ (0) +{ + if (this->set (addr) == -1) + ACE_ERROR ((LM_ERROR, + ASYS_TEXT ("%p\n"), + ASYS_TEXT ("ACE_URL_Addr::ACE_URL_Addr"))); +} + +int +ACE_URL_Addr::set (const ACE_URL_Addr &addr) +{ + ACE_OS::free (ACE_reinterpret_cast (void *, + ACE_const_cast (char *, + this->path_name_))); + ACE_OS::free (ACE_reinterpret_cast (void *, + ACE_const_cast (char *, + this->addr_string_))); + if (this->ACE_INET_Addr::set (addr) == -1) + return -1; + else + { + if (addr.path_name_) + ACE_ALLOCATOR_RETURN (this->path_name_, + ACE_OS::strdup (addr.path_name_), + -1); + if (addr.addr_string_) + ACE_ALLOCATOR_RETURN (this->addr_string_, + ACE_OS::strdup (addr.addr_string_), + -1); + this->addr_string_len_ = + addr.addr_string_len_; + return 0; + } +} + +void +ACE_URL_Addr::operator= (const ACE_URL_Addr &addr) +{ + if (this->set (addr) == -1) + ACE_ERROR ((LM_ERROR, + ASYS_TEXT ("%p\n"), + ASYS_TEXT ("ACE_URL_Addr::ACE_URL_Addr"))); +} + +u_long +ACE_URL_Addr::hash (void) const +{ + u_long result = this->ACE_INET_Addr::hash () + + ACE::hash_pjw (this->get_path_name ()); + + return result; +} + +int +ACE_URL_Addr::operator== (const ACE_URL_Addr &addr) const +{ + return ACE_OS::strcmp (addr.get_path_name (), + this->get_path_name ()) == 0 + && addr.get_port_number () == this->get_port_number () + && addr.get_ip_address () == this->get_ip_address (); +} + +int +ACE_URL_Addr::operator!= (const ACE_URL_Addr &addr) const +{ + return !(*this == addr); +} + +ACE_URL_Addr::ACE_URL_Addr (LPCTSTR host_name, + LPCTSTR path_name, + u_short port) + : ACE_INET_Addr (port, host_name), + path_name_ (ACE_OS::strdup (path_name)), + addr_string_ (0), + addr_string_len_ (0) +{ +} + +LPCTSTR +ACE_URL_Addr::get_path_name (void) const +{ + return this->path_name_; +} + +ACE_URL_Addr::~ACE_URL_Addr (void) +{ + ACE_OS::free (ACE_reinterpret_cast (void *, + ACE_const_cast (LPTSTR, + this->path_name_))); + ACE_OS::free (ACE_reinterpret_cast (void *, + ACE_const_cast (LPTSTR, + this->addr_string_))); + this->path_name_ = 0; +} + +int +ACE_URL_Addr::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} diff --git a/examples/Web_Crawler/URL_Addr.h b/examples/Web_Crawler/URL_Addr.h new file mode 100644 index 00000000000..c67772f243b --- /dev/null +++ b/examples/Web_Crawler/URL_Addr.h @@ -0,0 +1,108 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// ace +// +// = FILENAME +// URL_Addr.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef ACE_URL_ADDR_H +#define ACE_URL_ADDR_H + +#include "ace/INET_Addr.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +class ACE_URL_Addr : public ACE_INET_Addr +{ + // = TITLE + // Defines a URL address family address format. +public: + // = Initialization and termination methods. + ACE_URL_Addr (void); + // Constructor. + + ACE_URL_Addr (LPCTSTR host_name, + LPCTSTR path_name, + u_short port = ACE_DEFAULT_HTTP_PORT); + + ACE_URL_Addr (const ACE_URL_Addr &addr); + // Copy constructor. + + int set (const ACE_URL_Addr &addr); + // Essentially the copy constructor. + + virtual int string_to_addr (LPCTSTR address); + // Initializes an <ACE_URL_Addr> from the <address>, which can be + // "ip-number:port-number/path-name" (e.g., + // "tango.cs.wustl.edu:1234/~schmidt/" "ip-number:port-number/path-name" + // (e.g., "128.252.166.57:1234/~schmidt"). If there is no ':' in + // the <address> it is assumed to be an ip-number or ip-address + // number, with the port number <ACE_DEFAULT_HTTP_PORT>. + + virtual int addr_to_string (LPTSTR s, + size_t size, + int ipaddr_format = 1) const; + // Transform the current <ACE_INET_Addr> address into string format. + // If <ipaddr_format> is non-0 this produces + // "ip-number:port-number/path-name" (e.g., + // "128.252.166.57:80/~schmidt/"), whereas if <ipaddr_format> is 0 + // this produces "ip-name:port-number" (e.g., + // "tango.cs.wustl.edu:80/~schmidt/"). Returns -1 if the <size> of + // the <buffer> is too small, else 0. + + virtual LPCTSTR addr_to_string (int ipaddr_format = 1) const; + // Transform the current <ACE_INET_Addr> address into string format. + // If <ipaddr_format> is non-0 this produces + // "ip-number:port-number/path-name" (e.g., + // "128.252.166.57:80/~schmidt/"), whereas if <ipaddr_format> is 0 + // this produces "ip-name:port-number" (e.g., + // "tango.cs.wustl.edu:80/~schmidt/"). Uses dynamic memory, which + // is allocated on demand and deallocated when the object is + // destroyed. Returns -1 if dynamic memory fails, else 0. + + void operator= (const ACE_URL_Addr &addr); + // Assignment operator. + + ~ACE_URL_Addr (void); + // Destructor. + + int operator == (const ACE_URL_Addr &SAP) const; + // Compare two addresses for equality. The addresses are considered + // equal if they contain the same IP address, port number, and path + // name. + + int operator != (const ACE_URL_Addr &SAP) const; + // Compare two addresses for inequality. + + virtual u_long hash (void) const; + // Computes and returns hash value. + + LPCTSTR get_path_name (void) const; + // Return the path name. + + int destroy (void); + // Commit suicide. +private: + LPTSTR path_name_; + // Our path name. + + LPTSTR addr_string_; + // The dynamically address string that's used for the + // <addr_to_string> method. + + size_t addr_string_len_; + // Current length of the <addr_string_> +}; + +#endif /* ACE_URL_ADDR_H */ diff --git a/examples/Web_Crawler/URL_Status.cpp b/examples/Web_Crawler/URL_Status.cpp new file mode 100644 index 00000000000..eee2a0ed875 --- /dev/null +++ b/examples/Web_Crawler/URL_Status.cpp @@ -0,0 +1,40 @@ +/* -*- C++ -*- */ +// $Id$ + +#include "URL_Status.h" + +ACE_RCSID(HTTP_1.1_Client, URL_Status, "$Id$") + +URL_Status::URL_Status (STATUS_CODE code) + : status_ (code) +{ +} + +URL_Status::URL_Status (const URL_Status &s) + : status_ (s.status_) +{ +} + +URL_Status::STATUS_CODE +URL_Status::status (void) const +{ + return this->status_; +} + +void +URL_Status::status (int s) +{ + this->status_ = URL_Status::STATUS_CODE (s); +} + +void +URL_Status::status (URL_Status::STATUS_CODE s) +{ + this->status_ = s; +} + +int URL_Status::destroy (void) +{ + delete this; + return 0; +} diff --git a/examples/Web_Crawler/URL_Status.h b/examples/Web_Crawler/URL_Status.h new file mode 100644 index 00000000000..fa8186d6e9a --- /dev/null +++ b/examples/Web_Crawler/URL_Status.h @@ -0,0 +1,61 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// URL_Status.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _URL_STATUS_H +#define _URL_STATUS_H + +#include "ace/OS.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +class URL_Status +{ + // = TITLE +public: + enum STATUS_CODE + { + STATUS_OK = 200, + STATUS_CREATED = 201, + STATUS_ACCEPTED = 202, + STATUS_NO_CONTENT = 204, + STATUS_MOVED_PERMANENTLY = 301, + STATUS_MOVED_TEMPORARILY = 302, + STATUS_NOT_MODIFIED = 304, + STATUS_BAD_REQUEST = 400, + STATUS_UNAUTHORIZED = 401, + STATUS_FORBIDDEN = 403, + STATUS_NOT_FOUND = 404, + STATUS_INTERNAL_SERVER_ERROR = 500, + STATUS_NOT_IMPLEMENTED = 501, + STATUS_BAD_GATEWAY = 502, + STATUS_SERVICE_UNAVAILABLE = 503, + STATUS_INSUFFICIENT_DATA = 399 + }; + + URL_Status (STATUS_CODE = STATUS_INSUFFICIENT_DATA); + URL_Status (const URL_Status &); + + STATUS_CODE status (void) const; + void status (int); + void status (STATUS_CODE); + int destroy (void); +private: + STATUS_CODE status_; +}; + +#endif /* _URL_STATUS_H */ diff --git a/examples/Web_Crawler/URL_Visitor.cpp b/examples/Web_Crawler/URL_Visitor.cpp new file mode 100644 index 00000000000..e1943d75782 --- /dev/null +++ b/examples/Web_Crawler/URL_Visitor.cpp @@ -0,0 +1,604 @@ +// $Id$ + +#include "URL_Visitor.h" +#include "Command_Processor.h" + +ACE_RCSID(HTTP_1.1_Client, URL_Visitor, "$Id$") + +URL_Processing_Strategy::URL_Processing_Strategy (URL &url, + URL_Iterator &iterator) + : url_ (url), + iterator_ (iterator) +{ +} + +int +URL_Processing_Strategy::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} + +URL_Download_Strategy::URL_Download_Strategy (URL &url, + URL_Iterator &iterator) + : URL_Processing_Strategy (url, iterator) +{ +} + +int +URL_Download_Strategy::execute (void) +{ + ACE_CString buffer; + + // Extract all the contents of the Stream and print them to the + // file. + while (this->iterator_.next (buffer) != 0) + ACE_DEBUG ((LM_DEBUG, + "%s", + buffer.c_str ())); + + return 0; +} + +HTTP_Header_Processing_Strategy::HTTP_Header_Processing_Strategy (URL &url, + URL_Iterator &iterator) + : URL_Processing_Strategy (url, iterator) +{ +} + +int +HTTP_Header_Processing_Strategy::execute (void) +{ + ACE_DEBUG ((LM_DEBUG, "HEADER\n")); + // Set the get() position.Necessary since later a peek is done. + if (this->url_.stream ().get_char () == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n","Header Not Found"), + -1); + char line_buf[BUFSIZ + 1]; + ACE_CString line (line_buf); + // Get the lines in the header iteratively and check for status info. + int result = 1, i = 0; + for (i = 0, result = this->iterator_.next (line); + result > 0; + ++i, result = this->iterator_.next (line)) + { + if (i == 0) + { + ACE_DEBUG ((LM_DEBUG, "LINE:%s", line.c_str ())); + // Assuming that the status-no is a space away. + int status_index = line.find ("HTTP", 0); + ACE_CString status = line.substring (status_index + 9, //HTTP/1.1 200 + 3); + // DONE ONLY FOR TEMP USE AS OF NOW THE HTTP_SERVER DOESNT SEND AN HEADER. + // ACE_CString status_buf ("200"); + // status.set (status_buf.c_str (), 1); + + ACE_DEBUG ((LM_DEBUG, "STATUS %s\n", status.c_str ())); + URL_Status *url_status = 0; + ACE_NEW_RETURN (url_status, + URL_Status, + 0); + Auto_Destroyer<URL_Status> url_status_ptr (url_status); + url_status_ptr->status (ACE_OS::atoi (status.c_str ())); + this->url_.reply_status (**url_status_ptr); + // Invalid url. + if (url_status_ptr->status () != 200) + return -1; + } + else + { + + if (line.find ("text/html") >= 0) + { + ACE_CString url_content_type("text/html"); + this->url_.content_type (url_content_type); + } + } + } + return 0; + +} + +HTML_Body_Validation_Strategy::HTML_Body_Validation_Strategy (URL &url, + URL_Iterator &iterator, + URL_Validation_Visitor &context) + : URL_Processing_Strategy (url, iterator), + visitor_context_ (context) +{ +} + +int +HTML_Body_Validation_Strategy::execute (void) +{ + char host_name_buf[BUFSIZ + 1]; + ACE_CString host_name (host_name_buf); + host_name.set (url_.url_addr ().get_host_name (),1); + + // All to facilitate relative paths + char temp[BUFSIZ + 1]; + ACE_CString prev_location (temp); + + prev_location.set (this->url_.url_addr ().get_path_name (), + ACE_OS::strlen (this->url_.url_addr ().get_path_name ()), + 1); + int index = prev_location.rfind ('/', prev_location.length ()); + ACE_CString str = prev_location.substring (0, index + 1); + prev_location.set (str.c_str (), 1); + + // Note: prev_location always ends with '/' + if (prev_location[0] != '/') + prev_location = "/" + prev_location; + + // Build the url portion which can be attached to teh relative paths. + prev_location = host_name + prev_location; + + char url_string[BUFSIZ + 1]; + ACE_CString url (url_string); + + while (this->iterator_.next (url) > 0) + { + // Check for relative urls.Strip out "http://" if its there. + if (url.find ("http") < 0) + { + if (url[0] == '.' && url[1] == '.') + { + url.set (&url[3], 1); + int i = prev_location.rfind ('/', prev_location.length () - 1); + prev_location = prev_location.substring (0, i+1); + } + if (url[0] == '.' && url[1] == '/') + url.set (&url[2], 1); + + url = prev_location + url; + } + else + url.set (&url[7], 1); + // Double slash at the end works!e.g seista.cs.wustl.edu/~kirthika// + if (url.find (".html") < 0) + url = url + "/"; + + // Create the new URL address. + ACE_URL_Addr *url_addr; + ACE_NEW_RETURN (url_addr, + ACE_URL_Addr, + 0); + Auto_Destroyer<ACE_URL_Addr> url_addr_ptr (url_addr); + if (url_addr_ptr->string_to_addr (url.c_str ()) == 0) + { + HTTP_URL *http_url; + ACE_NEW_RETURN (http_url, + HTTP_URL (**url_addr_ptr, + ACE_dynamic_cast (HTTP_URL *, + &this->url_)), + 0); + URL_Command *url_command; + ACE_NEW_RETURN (url_command, + URL_Command (http_url), + 0); + + OPTIONS::instance ()->command_processor ()->insert (url_command); + } + } + return 0; +} + +URL_Iterator * +URL_Validation_Visitation_Strategy_Factory::make_header_iterator (void) +{ + URL_Iterator *i; + ACE_NEW_RETURN (i, + HTTP_Header_Iterator (*this->url_), + 0); + return i; +} + +URL_Iterator * +URL_Validation_Visitation_Strategy_Factory::make_body_iterator (void) +{ + URL_Iterator *i; + ACE_NEW_RETURN (i, + HTML_Body_Iterator (*this->url_), + 0); + return i; +} + +URL_Processing_Strategy * +URL_Validation_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator &iterator) +{ + URL_Processing_Strategy *ps; + ACE_NEW_RETURN (ps, + HTTP_Header_Processing_Strategy (*this->url_, + iterator), + 0); + return ps; +} + +URL_Processing_Strategy * +URL_Validation_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator &iterator) +{ + URL_Processing_Strategy *ps; + ACE_NEW_RETURN (ps, + HTML_Body_Validation_Strategy (*this->url_, + iterator, + this->visitor_context_), + 0); + return ps; +} + +int +URL_Validation_Visitation_Strategy_Factory::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} + +URL_Validation_Visitor::URL_Validation_Visitor (void) +{ + ACE_DEBUG ((LM_DEBUG, "URL_Validation_Visitor_Factory::Strategy Con created\n")); + ACE_NEW (this->strat_connector_, + STRAT_CONNECTOR(0, + &creation_strategy_, + &caching_connect_strategy_, + &activation_strategy_)); + if (strat_connector_ == 0) + ACE_ERROR ((LM_ERROR, + "%p %s\n" + "strategy connector creation failed")); + + +} + +URL_Validation_Visitor::~URL_Validation_Visitor (void) +{ +} + +URL_Validation_Visitor::URL_CACHE & +URL_Validation_Visitor::url_cache (void) +{ + return this->url_cache_; +} + +int +URL_Validation_Visitor::in_cache (const ACE_URL_Addr &url_addr) +{ + URL_Status reply_status (URL_Status::STATUS_CODE (1)); + + if (this->url_cache_.find (url_addr, reply_status) == 0) + { + ACE_DEBUG ((LM_DEBUG, + "status %d for URL %s (cached)\n", + reply_status.status (), + url_addr.addr_to_string (0))); + + // Invalid status. + if (reply_status.status () != 200) + return -1; + + return 1; + } + else + return 0; +} + +URL_Visitation_Strategy_Factory * +URL_Validation_Visitor::make_visitation_strategy_factory (URL &url) +{ + // Since this is HTTP 1.1 we'll need to establish a connection + // only once. Trying for relative paths. + // if (this->url_addr ().get_hostname () != url_hostname) + // { + if (url.stream ().open (this->strat_connector_, + url.url_addr ()) == -1) + return 0; + // } + // See if we can get connected and send the GET request via the + // <HTTP_URL>. + int result = url.send_request (); + if (result == -1) + { + ACE_ERROR ((LM_ERROR, + "%p\n", + "send_request")); + if (this->url_cache_.bind (url.url_addr (), + URL_Status (URL_Status::STATUS_SERVICE_UNAVAILABLE)) == -1) + ACE_ERROR ((LM_ERROR, + "%p\n", + "bind")); + return 0; + } + // @@ Here's where we could check to see if the <url> was HTTP or + // FTP, etc. But for now we'll just assume that everything is an + // HTTP URL. + else + { + + URL_Visitation_Strategy_Factory *vs; + ACE_NEW_RETURN (vs, + URL_Validation_Visitation_Strategy_Factory (&url, + *this), + 0); + return vs; + } +} + +int +URL_Validation_Visitor::destroy (void) +{ + delete this->strat_connector_; + // Commit suicide. + delete this; + return 0; +} + +int +URL_Validation_Visitor::visit (HTTP_URL &http_url) +{ + int result = this->in_cache (http_url.url_addr ()); + /* if (result == -1) + { + ACE_DEBUG ((LM_DEBUG, + "Invalid URL:%s (cached)\n", + http_url.url_addr().addr_to_string (0))); + return 0; + }*/ + if (result == 0) + { + Auto_Destroyer <URL_Visitation_Strategy_Factory> vs (this->make_visitation_strategy_factory (http_url)); + + if (*vs == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_visitation_strategy_factory"), + -1); + + Auto_Destroyer <URL_Iterator> ihs (vs->make_header_iterator ()); + if (*ihs == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_header_iterator"), + -1); + Auto_Destroyer <URL_Processing_Strategy> phs (vs->make_header_strategy (**ihs)); + if (*phs == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_header_strategy"), + -1); + int phs_result = phs->execute (); + if (phs_result == -1) + ACE_DEBUG ((LM_DEBUG, + "Invalid ")); + + ACE_DEBUG ((LM_DEBUG, + "URL with status %d %s\n", + http_url.reply_status ().status (), + http_url.url_addr().addr_to_string (0))); + + // Store the http url in the cache. + if (this->url_cache ().bind (http_url.url_addr (), + http_url.reply_status ()) != 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n","url_cache.bind"), + -1); + + // ACE_DEBUG ((LM_DEBUG, "header_processing result %d\n", phs_result)); + // Since it is invalid dont go further. + if (phs_result == -1) + return 0; + + // Get back if the recurse option isnt set. + if (OPTIONS::instance ()->recurse () != 1) + return 0; + + Auto_Destroyer <URL_Iterator> is (vs->make_body_iterator ()); + if (*is == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_body_iterator"), + -1); + + Auto_Destroyer <URL_Processing_Strategy> ps (vs->make_body_strategy (**is)); + if (*ps == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_body_strategy"), + -1); + + if (ps->execute () == -1) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "body execute"), + -1); + // http_url.stream().svc_handler ()->idle ();//KIRTHIKA + } + return 0; +} + +int +URL_Download_Visitation_Strategy_Factory::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} + +URL_Iterator * +URL_Download_Visitation_Strategy_Factory::make_header_iterator (void) +{ + return 0; +} + +URL_Iterator * +URL_Download_Visitation_Strategy_Factory::make_body_iterator (void) +{ + URL_Iterator *i; + ACE_NEW_RETURN (i, + URL_Download_Iterator (*this->url_), + 0); + return i; +} + +URL_Processing_Strategy * +URL_Download_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator &iterator) +{ + // You fill in here. + return 0; +} + +URL_Processing_Strategy * +URL_Download_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator &iterator) +{ + URL_Processing_Strategy *ps; + ACE_NEW_RETURN (ps, + URL_Download_Strategy (*this->url_, + iterator), + 0); + return ps; +} + +URL_Visitation_Strategy_Factory::URL_Visitation_Strategy_Factory (URL *url) + : url_ (url) +{ +} + +URL_Download_Visitation_Strategy_Factory::URL_Download_Visitation_Strategy_Factory (URL *url) + : URL_Visitation_Strategy_Factory (url) +{ +} + +URL_Validation_Visitation_Strategy_Factory::URL_Validation_Visitation_Strategy_Factory (URL *url, + URL_Validation_Visitor &visitor_context) + : URL_Visitation_Strategy_Factory (url), + visitor_context_ (visitor_context) +{ +} + +URL_Visitation_Strategy_Factory * +URL_Download_Visitor::make_visitation_strategy_factory (URL &url) +{ + // See if we can get connected and send the GET request via the + // <HTTP_URL>. + while (1) //KIRTHIKA + { + int retval = url.send_request (); + if (retval != -1) + break; + + /* ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "send_request"), + 0);*/ + } + // @@ Here's where we could check to see if the <url> was HTTP or + // FTP, etc. But for now we'll just assume that everything is an + // HTTP URL. + // else + // { + URL_Visitation_Strategy_Factory *vs; + ACE_NEW_RETURN (vs, + URL_Download_Visitation_Strategy_Factory (&url), + 0); + return vs; + // } +} + +int +URL_Download_Visitor::destroy (void) +{ + // Commit suicide. + delete this; + return 0; +} + +int +URL_Download_Visitor::visit (HTTP_URL &http_url) +{ + Auto_Destroyer <URL_Visitation_Strategy_Factory> vs (this->make_visitation_strategy_factory (http_url)); + + if (*vs == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_visitation_strategy_factory"), + -1); + + Auto_Destroyer <URL_Iterator> is (vs->make_body_iterator ()); + if (*is == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_body_iterator"), + -1); + + Auto_Destroyer <URL_Processing_Strategy> ps (vs->make_body_strategy (**is)); + if (*ps == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_body_strategy"), + -1); + + if (ps->execute () == -1) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "body execute"), + -1); + return 0; +} + +#if defined (ACE_HAS_EXPLICIT_TEMPLATE_INSTANTIATION) +template class ACE_Hash_Map_Manager<ACE_URL_Addr, URL_Status, ACE_Null_Mutex>; +template class ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>; +template class ACE_Strategy_Connector<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>, ACE_SOCK_CONNECTOR>; +template class ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>; +template class ACE_NOOP_Creation_Strategy<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> >; +template class ACE_NOOP_Concurrency_Strategy<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> >; +template class ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +template class ACE_Hash_Map_Iterator_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +template class ACE_Hash_Map_Reverse_Iterator_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +template class ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> >; +template class ACE_Cached_Connect_Strategy_Ex<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>,ACE_SOCK_CONNECTOR, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> >, ACE_SYNCH_NULL_MUTEX >; +template class ACE_Creation_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> >; +template class ACE_Concurrency_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> >; +template class ACE_Hash_Map_Manager_Ex<ACE_URL_Addr, URL_Status, ACE_Hash<ACE_URL_Addr>, ACE_Equal_To<ACE_URL_Addr>, ACE_Null_Mutex>; +template class ACE_Hash_Map_Entry<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Pair<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, int> >; +template class ACE_Connect_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH>, ACE_SOCK_Connector>; +template class ACE_Hash_Map_Iterator_Base_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +template class ACE_Optimal_Cache_Map_Manager<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > >; +template class ACE_Optimal_Cache_Map_Reverse_Iterator<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > >; +template class ACE_Optimal_Cache_Map_Iterator<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > >; +template class ACE_Hash_Map_Entry<ACE_URL_Addr, URL_Status>; +template class ACE_Hash_Map_Iterator_Base_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Pair<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +template class ACE_Recycling_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> > ; +template class ACE_Connector<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH>, ACE_SOCK_Connector>; +template class ACE_Map_Manager<int, ACE_Svc_Tuple<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> > *, ACE_RW_Thread_Mutex>; +template class ACE_Svc_Tuple<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> >; +template class ACE_Hash_Map_Iterator_Base_Ex<ACE_URL_Addr, URL_Status, ACE_Hash<ACE_URL_Addr>, ACE_Equal_To<ACE_URL_Addr>, ACE_Null_Mutex>; +#elif defined (ACE_HAS_TEMPLATE_INSTANTIATION_PRAGMA) +#pragma instantiate ACE_Hash_Map_Manager<ACE_URL_Addr, URL_Status, ACE_Null_Mutex> +#pragma instantiate ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> +#pragma instantiate ACE_Strategy_Connector<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>, ACE_SOCK_CONNECTOR> +#pragma instantiate ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> +#pragma instantiate ACE_NOOP_Creation_Strategy<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> > +#pragma instantiate ACE_NOOP_Concurrency_Strategy<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> > +#pragma instantiate ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *,int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> +#pragma instantiate ACE_Hash_Map_Iterator_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> +#pragma instantiate ACE_Hash_Map_Reverse_Iterator_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> +#pragma instantiate ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > +#pragma instantiate ACE_Cached_Connect_Strategy_Ex<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH>,ACE_SOCK_CONNECTOR, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> >, ACE_SYNCH_NULL_MUTEX > +#pragma instantiate ACE_Creation_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> >; +#pragma instantiate ACE_Concurrency_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> >; +#pragma instantiate ACE_Hash_Map_Manager_Ex<ACE_URL_Addr, URL_Status, ACE_Hash<ACE_URL_Addr>, ACE_Equal_To<ACE_URL_Addr>, ACE_Null_Mutex>; +#pragma instantiate ACE_Hash_Map_Entry<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Pair<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, int> >; +#pragma instantiate ACE_Connect_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH>, ACE_SOCK_Connector>; +#pragma instantiate ACE_Hash_Map_Iterator_Base_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex>; +#pragma instantiate ACE_Optimal_Cache_Map_Manager<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > > +#pragma instantiate ACE_Optimal_Cache_Map_Reverse_Iterator<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > > +#pragma instantiate ACE_Optimal_Cache_Map_Iterator<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_LRU_Caching_Strategy<ACE_Hash_Map_Manager_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>,ACE_Pair<ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> > > +#pragma instantiate ACE_Hash_Map_Entry<ACE_URL_Addr, URL_Status> +#pragma instantiate ACE_Hash_Map_Iterator_Base_Ex<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>, ACE_Pair<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> *, int>, ACE_Hash<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Equal_To<ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> >, ACE_Null_Mutex> +#pragma instantiate ACE_Recycling_Strategy<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> > +#pragma instantiate ACE_Connector<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH>, ACE_SOCK_Connector> +#pragma instantiate ACE_Map_Manager<int, ACE_Svc_Tuple<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> > *, ACE_RW_Thread_Mutex> +#pragma instantiate ACE_Svc_Tuple<ACE_Svc_Handler<ACE_SOCK_Stream, ACE_NULL_SYNCH> > +#pragma instantiate ACE_Hash_Map_Iterator_Base_Ex<ACE_URL_Addr, URL_Status, ACE_Hash<ACE_URL_Addr>, ACE_Equal_To<ACE_URL_Addr>, ACE_Null_Mutex> +#endif /* ACE_HAS_EXPLICIT_TEMPLATE_INSTANTIATION */ diff --git a/examples/Web_Crawler/URL_Visitor.h b/examples/Web_Crawler/URL_Visitor.h new file mode 100644 index 00000000000..2ce1d6ec78c --- /dev/null +++ b/examples/Web_Crawler/URL_Visitor.h @@ -0,0 +1,372 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// URL_Visitor.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _URL_VISITOR_H +#define _URL_VISITOR_H + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ace/Strategies_T.h" +#include "ace/Synch.h" +#include "ace/Hash_Map_Manager.h" +#include "HTTP_URL.h" +#include "Iterators.h" +#include "ace/Hash_Map_Manager_T.h" +#include "Caching_Strategies_T.h" +#include "Cached_Connect_Strategy_T.h" +#include "Options.h" + +// Forward declarations. +class URL_Validation_Visitor; + +class URL_Processing_Strategy +{ + // = TITLE + // Abstract base class for the URL processing strategy. + // + // = DESCRIPTION +public: + URL_Processing_Strategy (URL &, + URL_Iterator &); + // Constructor. + + virtual int execute (void) = 0; + // Perform the strategy. + + virtual int destroy (void); + // Close down the resources. + +protected: + URL &url_; + // A reference to the URL "context" that we're processing. + + URL_Iterator &iterator_; + // Iterator for the URL that we're processing. +}; + +class HTTP_Header_Processing_Strategy : public URL_Processing_Strategy +{ + // = TITLE + // Defines the HTTP header processing strategy. + // + // = DESCRIPTION +public: + HTTP_Header_Processing_Strategy (URL &, + URL_Iterator &); + // Constructor. + + virtual int execute (void); + // Perform the strategy for processing an HTTP header. +}; + +class HTML_Body_Validation_Strategy : public URL_Processing_Strategy +{ + // = TITLE + // Defines the HTML body processing strategy. + // + // = DESCRIPTION + // This class iterates through the body of an HTML file and + // recursively visits embedded links. +public: + HTML_Body_Validation_Strategy (URL &, + URL_Iterator &, + URL_Validation_Visitor &); + // Constructor. + + virtual int execute (void); + // Perform the strategy for processing an HTML file. This strategy + // iterates over the HTML file and recursively visits embedded links + // to process them, as well. + +private: + URL_Validation_Visitor &visitor_context_; + // This is the context of the visit. +}; + +class URL_Download_Strategy : public URL_Processing_Strategy +{ + // = TITLE + // Defines a URL downloading strategy. + // + // = DESCRIPTION + // This class downloads a URL's contents into a temporary file. +public: + URL_Download_Strategy (URL &, + URL_Iterator &); + // Constructor. + + virtual int execute (void); + // Perform the strategy for downloading a URL to a temporary file. +}; + +class URL_Visitation_Strategy_Factory +{ + // = TITLE + // Abstract Factory for the URL visitation strategy. + // + // = DESCRIPTION +public: + URL_Visitation_Strategy_Factory (URL *); + + // = Factory Methods. + virtual URL_Iterator *make_header_iterator (void) = 0; + // Factory Method that makes the header iterator. + + virtual URL_Iterator *make_body_iterator (void) = 0; + // Factory Method that makes the body iterator. + + virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &) = 0; + // Factory Method that makes the header processing strategy. + + virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &) = 0; + // Factory Method that makes the body processing strategy . + + virtual int destroy (void) = 0; + // Close down the resources. + +protected: + URL *url_; + // Stash the URL so we don't have to pass it around. +}; + +class URL_Download_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory +{ + // = TITLE + // Concrete Factory for the URL validation visitation strategy. + // + // = DESCRIPTION +public: + URL_Download_Visitation_Strategy_Factory (URL *); + // Constructor. + + // = Factory Methods. + virtual URL_Iterator *make_header_iterator (void); + // Factory Method that makes an <HTTP_Header_Iterator>. + + virtual URL_Iterator *make_body_iterator (void); + // Factory Method that makes an <HTML_Body_Iterator>. + + virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &); + // Factory Method that makes the header processing strategy. + + virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &); + // Factory Method that makes the body processing strategy . + + virtual int destroy (void); + // Close down the resources. +}; + +class URL_Validation_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory +{ + // = TITLE + // Concrete Factory for the URL validation visitation strategy. + // + // = DESCRIPTION +public: + URL_Validation_Visitation_Strategy_Factory (URL *, + URL_Validation_Visitor &); + // Constructor. + + // = Factory Methods. + virtual URL_Iterator *make_header_iterator (void); + // Factory Method that makes an <HTTP_Header_Iterator>. + + virtual URL_Iterator *make_body_iterator (void); + // Factory Method that makes an <HTML_Body_Iterator>. + + virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &); + // Factory Method that makes the header processing strategy. + + virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &); + // Factory Method that makes the body processing strategy . + + virtual int destroy (void); + // Close down the resources. + +private: + URL_Validation_Visitor &visitor_context_; + // Context of the visitor. +}; + +class URL_Visitor +{ + // = TITLE + // Base class for the URL Visitor. + // + // = DESCRIPTION + // This class plays the "visitor" role in the Visitor pattern. +public: + virtual int visit (HTTP_URL &http_url) = 0; + // Visit an <HTTP_URL>. + + // @@ + // virtual int visit (FTP_URL &http_url) = 0; + + virtual int destroy (void) = 0; + // Cleanup the resources. + +protected: + virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &) = 0; + // Make the appropriate <URL_Visitation_Strategy_Factory>. +}; + +class URL_Validation_Visitor : public URL_Visitor +{ + // = TITLE + // Subclass that defines the URL validation visitor. + // + // = DESCRIPTION + // This class checks to make sure that the <HTTP_URL> is valid. + // If the <HTTP_URL> is an <HTML> file, it can also be used to + // recursively check that all embedded links in this file are + // valid. +public: + typedef ACE_Hash_Map_Manager <ACE_URL_Addr, URL_Status, ACE_Null_Mutex> + URL_CACHE; + + virtual int visit (HTTP_URL &http_url); + // Visit an <HTTP_URL> to make sure that it's valid. If the content + // type of the <HTTP_URL> is "text/html" and the <recursion> option + // is enabled then <visit> recursively checks each link embedded in + // the HTML page. + + // @@ + // virtual int visit (FTP_URL &http_url); + + URL_Validation_Visitor (void); + virtual int destroy (void); + // Cleanup the resources. + + URL_CACHE &url_cache (void); + // Returns a reference to the URL cache. + + typedef ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_NULL_SYNCH> Svc_Handler; + + typedef ACE_Strategy_Connector<Svc_Handler, + ACE_SOCK_CONNECTOR> + STRAT_CONNECTOR; + typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr> + REFCOUNTED_HASH_RECYCLABLE_ADDRESS; + typedef ACE_NOOP_Creation_Strategy<Svc_Handler> + NULL_CREATION_STRATEGY; + typedef ACE_NOOP_Concurrency_Strategy<Svc_Handler> + NULL_ACTIVATION_STRATEGY; + + typedef ACE_Hash_Map_Manager_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\ + ACE_Pair<Svc_Handler *, int>,\ + ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \ + ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\ + ACE_Null_Mutex> + CONNECTION_HASH_MAP; + typedef ACE_Hash_Map_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\ + ACE_Pair<Svc_Handler *, int>,\ + ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \ + ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\ + ACE_Null_Mutex> + CONNECTION_HASH_MAP_ITERATOR; + typedef ACE_Hash_Map_Reverse_Iterator_Ex<REFCOUNTED_HASH_RECYCLABLE_ADDRESS,\ + ACE_Pair<Svc_Handler *, int>,\ + ACE_Hash<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>, \ + ACE_Equal_To<REFCOUNTED_HASH_RECYCLABLE_ADDRESS>,\ + ACE_Null_Mutex> + CONNECTION_HASH_MAP_REVERSE_ITERATOR; + typedef ACE_LRU_Caching_Strategy<CONNECTION_HASH_MAP> + LRU; + typedef ACE_Cached_Connect_Strategy_Ex<Svc_Handler,ACE_SOCK_CONNECTOR, LRU, ACE_SYNCH_NULL_MUTEX> + CACHED_CONNECT_STRATEGY; + +protected: + virtual ~URL_Validation_Visitor (void); + virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &); + // Factory Method that makes a + // <URL_Validation_Visitation_Strategy_Factory>. + + URL_CACHE url_cache_; + // Cache the status of URLs we've already validated. + + int in_cache (const ACE_URL_Addr &url_addr); + // Check to see if the reply status of this <url_addr> is in the + // cache. Returns 1 if so, 0 if not. + + NULL_CREATION_STRATEGY creation_strategy_; + NULL_ACTIVATION_STRATEGY activation_strategy_; + + // Configure the Strategy Connector with a strategy that caches + // connection. + CACHED_CONNECT_STRATEGY caching_connect_strategy_; + + STRAT_CONNECTOR *strat_connector_; +}; + +class URL_Download_Visitor : public URL_Visitor +{ + // = TITLE + // Subclass for the URL validtion visitor. + // + // = DESCRIPTION + // This class checks to make sure that the <HTTP_URL> is valid. +public: + virtual int visit (HTTP_URL &http_url); + // Visit an <HTTP_URL> to make sure that it's valid. If the content + // type of the <HTTP_URL> is "text/html" and the <recursion> option + // is enabled then <visit> recursively checks each link embedded in + // the HTML page. + + // @@ + // virtual int visit (FTP_URL &http_url); + + virtual int destroy (void); + // Cleanup the resources. + +protected: + URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &); + // Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>. +}; + +template <class T> +class Auto_Destroyer +{ + // = TITLE + // Simple class that ensures the <destroy> method is called on our + // <URL_*> objects when they go out of scope. + // + // = DESCRIPTION + // This class is similar to an auto_ptr<> and should be used to + // simplify blocks of code that must create/destroy pointers to + // various <URL_*> related strategies and iterators. +public: + Auto_Destroyer (T *t): t_ (t) {} + T *operator-> (void) { return this->t_; } + T *operator *(void) { return this->t_; } + void operator= (T *t) + { + if (this->t_ != 0) + this->t_->destroy (); + this->t_ = t; + } + ~Auto_Destroyer (void) + { + if (this->t_ != 0) + t_->destroy (); + } +private: + T *t_; +}; + + +#endif /* _URL_VISITOR_H */ diff --git a/examples/Web_Crawler/URL_Visitor_Factory.cpp b/examples/Web_Crawler/URL_Visitor_Factory.cpp new file mode 100644 index 00000000000..2e695431fda --- /dev/null +++ b/examples/Web_Crawler/URL_Visitor_Factory.cpp @@ -0,0 +1,46 @@ +/* -*- C++ -*- */ +// $Id$ + +#include "URL_Visitor_Factory.h" + +ACE_RCSID(HTTP_1.1_Client, URL_Visitor_Factory, "$Id$") + +URL_Visitor * +URL_Validation_Visitor_Factory::make_visitor (void) +{ + URL_Visitor *v; + + ACE_NEW_RETURN (v, + URL_Validation_Visitor, + 0); + + return v; +} + +Command_Processor * +URL_Validation_Visitor_Factory::make_command_processor (void) +{ + Command_Processor *cp; + + ACE_NEW_RETURN (cp, + Command_Processor, + 0); + return cp; +} + +URL_Visitor * +URL_Download_Visitor_Factory::make_visitor (void) +{ + URL_Visitor *v; + + ACE_NEW_RETURN (v, + URL_Download_Visitor, + 0); + return v; +} + +Command_Processor * +URL_Download_Visitor_Factory::make_command_processor (void) +{ + return 0; +} diff --git a/examples/Web_Crawler/URL_Visitor_Factory.h b/examples/Web_Crawler/URL_Visitor_Factory.h new file mode 100644 index 00000000000..f1fc27d5bd4 --- /dev/null +++ b/examples/Web_Crawler/URL_Visitor_Factory.h @@ -0,0 +1,70 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// URL_Visitor_Factory.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _URL_VISITOR_FACTORY_H +#define _URL_VISITOR_FACTORY_H + +#include "URL_Visitor.h" +#include "Command_Processor.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +class URL_Visitor_Factory +{ + // = TITLE + // Abstract base class that creates URL visitors. + // + // = DESCRIPTION + // Subclasses define each of the Factory Methods to + // make the right objects, which all "vary" together. +public: + virtual URL_Visitor *make_visitor (void) = 0; + // Factory Method that makes the appropriate type of <URL_Visitor>. + + virtual Command_Processor *make_command_processor (void) = 0; + // Factory Method that makes the appropriate type of + // <Command_Processor>. +}; + +class URL_Validation_Visitor_Factory : public URL_Visitor_Factory +{ + // = TITLE + // Create a URL visitor that validates URL links. +public: + virtual URL_Visitor *make_visitor (void); + // Factory Method that makes a <URL_Validation_Visitor>. + + virtual Command_Processor *make_command_processor (void); + // Factory Method that makes a <FIFO_Command_Processor>. + + +}; + +class URL_Download_Visitor_Factory : public URL_Visitor_Factory +{ + // = TITLE + // Create a URL visitor that downloads URL links. +public: + virtual URL_Visitor *make_visitor (void); + // Factory Method that makes a <URL_Download_Visitor>. + + virtual Command_Processor *make_command_processor (void); + // Factory Method that makes a <FIFO_Command_Processor>. +}; + +#endif /* _URL_VISITOR_FACTORY_H */ diff --git a/examples/Web_Crawler/Web_Crawler.cpp b/examples/Web_Crawler/Web_Crawler.cpp new file mode 100644 index 00000000000..4697ea8cbbc --- /dev/null +++ b/examples/Web_Crawler/Web_Crawler.cpp @@ -0,0 +1,95 @@ +// $Id$ + +#include "Options.h" +#include "URL_Visitor_Factory.h" +#include "Web_Crawler.h" + +ACE_RCSID(HTTP_1.1_Client, Web_Crawler, "$Id$") + +Web_Crawler::~Web_Crawler (void) +{ + delete this->url_visitor_factory_; +} + +Web_Crawler::Web_Crawler (void) + : url_visitor_factory_ (0) +{ +} + +int +Web_Crawler::open (int argc, char *argv[]) +{ + if (OPTIONS::instance ()->parse_args (argc, argv) == -1) + return -1; + // @@ Put the ACE_Service_Config::open() stuff here somewhere... + else + { + // For now just hardcode this to create "validation" visitors. + ACE_NEW_RETURN (this->url_visitor_factory_, + URL_Validation_Visitor_Factory, + -1); + return 0; + } +} + +int +Web_Crawler::run (void) +{ + // Make the appropriate <URL_Visitor>. + Auto_Destroyer<URL_Visitor> visitor (this->url_visitor_factory_->make_visitor ()); + + if (*visitor == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_visitor"), + -1); + + // Make the appropriate <Command_Processor>. + Auto_Destroyer<Command_Processor> cp (this->url_visitor_factory_->make_command_processor ()); + + if (*cp == 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", + "make_command_processor"), + -1); + + // Set the <Command_Processor> in the <Options> to make it visible. + OPTIONS::instance ()->command_processor (*cp); + + // Set the <URL_Visitor> in the <Options> to make it visible. + OPTIONS::instance ()->visitor (*visitor); + + // @@ You fill in here... + ACE_URL_Addr *url_addr; + ACE_NEW_RETURN (url_addr, + ACE_URL_Addr (OPTIONS::instance()->hostname (), + OPTIONS::instance()->path_name (), + OPTIONS::instance()->port_no ()), //KIRTHIKA + 0); + Auto_Destroyer<ACE_URL_Addr> url_addr_ptr (url_addr); + + HTTP_URL *http_url; + ACE_NEW_RETURN (http_url, + HTTP_URL (**url_addr_ptr), + 0); + + Auto_Destroyer<HTTP_URL> http_url_ptr (http_url); + + URL_Command *url_command; + ACE_NEW_RETURN (url_command, + URL_Command (*http_url_ptr), + 0); + // Auto_Destroyer<URL_Command> url_command_ptr (url_command); + + if (cp->insert (url_command) != 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", "insert"), + -1); + + if (cp->execute () != 0) + ACE_ERROR_RETURN ((LM_ERROR, + "%p\n", "execute"), + -1); + return 0; +} + diff --git a/examples/Web_Crawler/Web_Crawler.h b/examples/Web_Crawler/Web_Crawler.h new file mode 100644 index 00000000000..e2590f986ca --- /dev/null +++ b/examples/Web_Crawler/Web_Crawler.h @@ -0,0 +1,62 @@ +/* -*- C++ -*- */ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// Web_Crawler.h +// +// = AUTHOR +// Douglas C. Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#ifndef _WEB_CRAWLER_H +#define _WEB_CRAWLER_H + +#include "URL_Addr.h" +#include "HTTP_URL.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#define ACE_LACKS_PRAGMA_ONCE +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +// Forward declaration. +class URL_Visitor_Factory; + +class Web_Crawler +{ + // = TITLE + // An abstraction for a Web Crawler. + // + // = DESCRIPTION + // This class is a Facade that organizes the other classes in the + // solution, which include a factory that creates a visitor, + // which in turn embodies the appropriate visitation strategy. +public: + // = Initialization and termination methods. + Web_Crawler (void); + // Constructor. + + ~Web_Crawler (void); + // Destructor. + + int open (int argc, char *argv[]); + // Parses the command-line options and initializes the + // <URL_Visitor_Factory>. + + int run (void); + // Run the Web Crawler and carries out whatever visitation strategy + // is configured. Returns -1 on failure and 0 on success. + +private: + URL_Visitor_Factory *url_visitor_factory_; + // Pointer to a factory that creates visitors that explore URLs and + // perform various tasks. Subclasses of <URL_Visitor_Factory> + // determine what happens during a visitation. +}; + +#endif /* _WEB_CRAWLER_H */ diff --git a/examples/Web_Crawler/main.cpp b/examples/Web_Crawler/main.cpp new file mode 100644 index 00000000000..3a349333e01 --- /dev/null +++ b/examples/Web_Crawler/main.cpp @@ -0,0 +1,49 @@ +// $Id$ + +// ============================================================================ +// +// = LIBRARY +// apps/Web +// +// = FILENAME +// main.cpp +// +// = DESCRIPTION +// This program implements a Web crawler that can be configured to +// apply various strategies to URLs that it visits. +// +// = AUTHOR +// Doug Schmidt <schmidt@cs.wustl.edu> +// +// ============================================================================ + +#include "Web_Crawler.h" +#include "Options.h" +#include "ace/Signal.h" + +ACE_RCSID(HTTP_1.1_Client, main, "$Id$") + +void sig_handler (int) +{ + ACE_DEBUG ((LM_DEBUG, + "aborting!\n")); + ACE_OS::abort (); +} + +int +main (int argc, char *argv[]) +{ + ACE_Sig_Action sa ((ACE_SignalHandler) sig_handler, SIGFPE); + + Web_Crawler crawler; + + if (crawler.open (argc, argv) == -1) + return 1; + else if (crawler.run () == -1) + return 1; + else + return 0; +} + + + |