247 lines
7.4 KiB
C#
247 lines
7.4 KiB
C#
// /**
|
|
// * File: Crawler.cs
|
|
// * Author: haraldwolff
|
|
// *
|
|
// * This file and it's content is copyrighted by the Author and / or copyright holder.
|
|
// * Any use wihtout proper permission is illegal and may lead to legal actions.
|
|
// *
|
|
// *
|
|
// **/
|
|
using System;
|
|
using ln.types.threads;
|
|
using System.IO;
|
|
using ln.logging;
|
|
using System.Linq;
|
|
using System.Threading;
|
|
using Newtonsoft.Json;
|
|
using Newtonsoft.Json.Converters;
|
|
using ln.types.odb;
|
|
using ln.skyscanner.crawl.service;
|
|
using ln.skyscanner.crawl.tests;
|
|
using ln.types.net;
|
|
|
|
namespace ln.skyscanner.crawl
|
|
{
|
|
public class Crawler
|
|
{
|
|
static Crawler()
|
|
{
|
|
CrawlService.RegisterService(new TCP(new int[] { 13080, 13022, 80, 22, 443, 13443 }));
|
|
CrawlService.RegisterService(new SNMP(new string[] { "VhclfC7lfIojYZ", "Vhclf(C7$lfIojYZ", "ByFR4oW98hap", "qVy3hnZJ2fov" }));
|
|
CrawlService.RegisterService(new RFC1213());
|
|
CrawlService.RegisterService(new HTTP());
|
|
CrawlService.RegisterService(new SSH());
|
|
CrawlService.RegisterService(new Ubiquity());
|
|
}
|
|
|
|
public SkyScanner SkyScanner { get; }
|
|
|
|
public String BasePath { get; set; }
|
|
public string PoolPath => Path.Combine(BasePath, "pool");
|
|
public String DBFileName => Path.Combine(PoolPath, "crawler.db");
|
|
|
|
bool stopping;
|
|
|
|
Pool crawlThreadPool = new Pool(0);
|
|
|
|
public PoolJob[] CurrentJobs => crawlThreadPool.CurrentPoolJobs;
|
|
public PoolJob[] QueuedJobs => crawlThreadPool.QueuedJobs;
|
|
|
|
public bool CrawlSubnets { get; set; }
|
|
public bool CrawlHosts { get; set; }
|
|
|
|
[JsonConverter(typeof(StringEnumConverter))]
|
|
public ComponentState CrawlerState { get; private set; }
|
|
|
|
Thread threadScheduler;
|
|
|
|
public Crawler(SkyScanner skyScanner)
|
|
{
|
|
SkyScanner = skyScanner;
|
|
try
|
|
{
|
|
BasePath = Path.Combine(skyScanner.BasePath, "crawler");
|
|
|
|
if (!Directory.Exists(BasePath))
|
|
Directory.CreateDirectory(BasePath);
|
|
if (!Directory.Exists(PoolPath))
|
|
Directory.CreateDirectory(PoolPath);
|
|
|
|
CrawlerState = ComponentState.INITIALIZED;
|
|
}
|
|
catch (Exception)
|
|
{
|
|
Stop();
|
|
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public void Start()
|
|
{
|
|
if (CrawlerState != ComponentState.STOPPING)
|
|
{
|
|
stopping = false;
|
|
|
|
//if (dbCrawlPool == null)
|
|
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
|
|
|
|
if (crawlThreadPool == null)
|
|
crawlThreadPool = new Pool(64);
|
|
else
|
|
crawlThreadPool.SetPoolSize(64);
|
|
|
|
if (threadScheduler == null)
|
|
{
|
|
threadScheduler = new Thread(scheduler);
|
|
threadScheduler.Start();
|
|
}
|
|
|
|
CrawlerState = ComponentState.STARTED;
|
|
}
|
|
}
|
|
|
|
public void Stop()
|
|
{
|
|
try
|
|
{
|
|
if (CrawlerState == ComponentState.STOPPING)
|
|
{
|
|
if (crawlThreadPool != null)
|
|
{
|
|
crawlThreadPool.Abort();
|
|
|
|
|
|
if (crawlThreadPool.CurrentPoolSize == 0)
|
|
crawlThreadPool = null;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
CrawlerState = ComponentState.STOPPING;
|
|
|
|
stopping = true;
|
|
if (threadScheduler != null)
|
|
{
|
|
threadScheduler.Join();
|
|
threadScheduler = null;
|
|
}
|
|
|
|
crawlThreadPool.Close();
|
|
|
|
stopping = false;
|
|
CrawlerState = ComponentState.STOPPED;
|
|
}
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
Logging.Log(e);
|
|
CrawlerState = ComponentState.FAILED;
|
|
}
|
|
}
|
|
|
|
public void EnsureSubnet(Network4 network)
|
|
{
|
|
lock (this)
|
|
{
|
|
FindSubnet(network);
|
|
}
|
|
}
|
|
|
|
public void Enqueue(JobDelegate job)
|
|
{
|
|
crawlThreadPool.Enqueue(job);
|
|
}
|
|
|
|
public void Crawl(Guid hostID)
|
|
{
|
|
CrawledHost crawledHost = SkyScanner.Instance.Entities.CrawledHosts[hostID];
|
|
Crawl(crawledHost);
|
|
}
|
|
public void Crawl(CrawledHost crawledHost)
|
|
{
|
|
Crawl crawl = new Crawl(this, crawledHost);
|
|
crawlThreadPool.Enqueue(crawl);
|
|
}
|
|
|
|
public void Crawl(CrawledSubnet subnet)
|
|
{
|
|
CrawlNetwork crawlSubnet = new CrawlNetwork(this, subnet);
|
|
crawlThreadPool.Enqueue(crawlSubnet);
|
|
}
|
|
|
|
public CrawledHost FindHostForIP(IPv4 ip)
|
|
{
|
|
lock (this)
|
|
{
|
|
Query nodeByIpQuery = Query.OR(
|
|
Query.Equals<CrawledHost>("IPAddresses[]", ip),
|
|
Query.Equals<CrawledHost>("PrimaryIP", ip)
|
|
);
|
|
CrawledHost crawledHost = SkyScanner.Instance.Entities.CrawledHosts.Query(nodeByIpQuery).FirstOrDefault();
|
|
if (crawledHost == null)
|
|
{
|
|
crawledHost = new CrawledHost();
|
|
crawledHost.PrimaryIP = ip;
|
|
crawledHost.Name = ip.ToString();
|
|
|
|
SkyScanner.Instance.Entities.CrawledHosts.Insert(crawledHost);
|
|
}
|
|
return crawledHost;
|
|
}
|
|
}
|
|
public CrawledSubnet FindSubnet(Network4 network)
|
|
{
|
|
lock (this)
|
|
{
|
|
Query subnetQuery = Query.Equals<CrawledSubnet>("Network", network);
|
|
CrawledSubnet sn = SkyScanner.Instance.Entities.CrawledSubnets.Query(subnetQuery).FirstOrDefault();
|
|
if (sn == null)
|
|
{
|
|
Logging.Log(LogLevel.INFO, "Crawler adds new subnet: {0}", network);
|
|
sn = new CrawledSubnet(network);
|
|
|
|
foreach (Network4 blockedNetwork in SkyScanner.Instance.Entities.BlockedNetworks)
|
|
{
|
|
if (blockedNetwork.Contains(network))
|
|
sn.DisableCrawling = true;
|
|
}
|
|
|
|
SkyScanner.Instance.Entities.CrawledSubnets.Insert(sn);
|
|
}
|
|
return sn;
|
|
}
|
|
}
|
|
|
|
|
|
private void scheduler()
|
|
{
|
|
int count = 0;
|
|
|
|
while (!stopping)
|
|
{
|
|
count++;
|
|
|
|
SkyScanner.Entities.GlobalNetwork.Update();
|
|
|
|
if (CrawlHosts)
|
|
foreach (CrawledHost crawledHost in SkyScanner.Instance.Entities.CrawledHosts)
|
|
{
|
|
if (crawledHost.NextCheck < DateTime.Now)
|
|
Crawl(crawledHost);
|
|
}
|
|
|
|
if (CrawlSubnets)
|
|
foreach (CrawledSubnet subnet in SkyScanner.Instance.Entities.CrawledSubnets)
|
|
{
|
|
if (!subnet.DisableCrawling && (subnet.NextScan < DateTime.Now) && (subnet.Network.Width >= 24))
|
|
Crawl(subnet);
|
|
}
|
|
|
|
Thread.Sleep(5000);
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
} |