description and source-codefunction Crawler() {
this.knownUrls = {};
/*
* Urls that were reported in the onSuccess or onFailure callbacks. this.crawledUrls is a subset of this.knownUrls, and matches
it
* iff there were no redirects while crawling.
*/
this.crawledUrls = [];
this.depth = DEFAULT_DEPTH;
this.ignoreRelative = false;
this.userAgent = DEFAULT_USERAGENT;
this.maxConcurrentRequests = DEFAULT_MAX_CONCURRENT_REQUESTS;
this.maxRequestsPerSecond = DEFAULT_MAX_REQUESTS_PER_SECOND;
this.shouldCrawl = function(url) {
return true;
};
this.shouldCrawlLinksFrom = function(url) {
return true;
};
//Urls that are queued for crawling, for some of them HTTP requests may not yet have been issued
this._currentUrlsToCrawl = [];
this._concurrentRequestNumber = 0;
//Injecting request as a dependency for unit test support
this.request = request;
}