Modifier | Constructor and Description |
---|---|
protected |
Crawler(CrawlerRule crawlerRule)
构建函数
注意此构造方法不会校验规则定义 |
protected Crawler(CrawlerRule crawlerRule)
crawlerRule
- public void run()
public void clear()
public static Crawler create(CrawlerRule crawlerRule)
crawlerRule
- 规定定义public static final SimulatorData testDown(String url, SiteRule siteRule)
url
- 测试网页的地址siteRule
- 站点规则public static final SimulatorData testDown(String url, SiteRule siteRule, Downloader downloader)
url
- 测试网页的地址siteRule
- 站点规则downloader
- 网页下载器public static final SimulatorData testContent(String url, SiteRule siteRule, ExtractRule contentExtractRule)
url
- 测试网页的地址siteRule
- 站点规则contentExtractRule
- 内容提取规则public static final SimulatorData testContent(String url, SiteRule siteRule, ExtractRule contentExtractRule, Downloader downloader)
url
- 测试网页的地址siteRule
- 站点规则contentExtractRule
- 内容提取规则downloader
- 网页下载器public static final SimulatorData testMatcher(String url, SiteRule siteRule, ContentRule content)
url
- 测试目标地址siteRule
- 站点规则content
- 内容解析规则public static final SimulatorData testMatcher(String url, SiteRule siteRule, ContentRule content, Downloader downloader)
url
- 测试目标地址siteRule
- 站点规则content
- 内容解析规则downloader
- 下载器public static final SimulatorData testLink(String url, SiteRule siteRule, LinkRule linkRule)
url
- 测试目标地址siteRule
- 站点规则linkRule
- 链接提取规则public static final SimulatorData testLink(String url, SiteRule siteRule, LinkRule linkRule, Downloader downloader)
url
- 测试目标地址siteRule
- 站点规则linkRule
- 链接提取规则downloader
- 网页下载器public boolean isRun()
public CrawlerRule getCrawlerRule()
getCrawlerRule
in interface Task
public Downloader getDownloader()
public Crawler setDownloader(Downloader downloader)
downloader
- 网页下载器public Scheduler getScheduler()
public CrawlerListener getCrawlerListener()
public Crawler setCrawlerListener(CrawlerListener crawlerListener)
crawlerListener
- 事件监听器public LinkExtract getLinkExtract()
public Crawler setLinkExtract(LinkExtract linkExtract)
linkExtract
- 链接解析器public ContentExtract getContentExtract()
public Crawler setContentExtract(ContentExtract contentExtract)
contentExtract
- public Pipeline getPipeline()
public ThreadPoolExecutor getThreadPool()
public Crawler setThreadPool(ThreadPoolExecutor threadPool)
public RequestCache getRequestCache()
public Crawler setRequestCache(RequestCache requestCache)
requestCache
- 资源缓存器public StatuObserver getStatuObserver()
public Crawler setStatuObserver(StatuObserver statuObserver)
statuObserver
- 状态监听器public Crawler setScheduler(Scheduler scheduler)
scheduler
- 资源调度器public long getAllTaskCount()
getAllTaskCount
in interface Task
public long getExtractedTaskCount()
getExtractedTaskCount
in interface Task
public long getFailTaskCount()
getFailTaskCount
in interface Task
public String getName()
public LocalDateTime getStartTime()
getStartTime
in interface Task
public DuplicateRemover getDuplicateRemover()
public Crawler setDuplicateRemover(DuplicateRemover duplicateRemover)
duplicateRemover
- 请求去重器public Crawler setExtra(Map<String,Object> map)
map
- 额外信息 public Crawler addExtra(Map<String,Object> map)
map
- 额外信息 public Crawler setExtra(String key, Object value)
key
- 额外信息的键 value
- 额外信息的值Copyright © 2020 Pivotal Software, Inc.. All rights reserved.