ln.skyscanner/crawl/Crawler.cs

279 lines
8.5 KiB
C#

// /**
// * File: Crawler.cs
// * Author: haraldwolff
// *
// * This file and it's content is copyrighted by the Author and / or copyright holder.
// * Any use wihtout proper permission is illegal and may lead to legal actions.
// *
// *
// **/
using System;
using ln.types.threads;
using System.IO;
using ln.logging;
using System.Linq;
using System.Threading;
using ln.types.odb;
using ln.skyscanner.crawl.service;
using ln.skyscanner.crawl.tests;
using ln.types.net;
using ln.skyscanner.entities;
namespace ln.skyscanner.crawl
{
public class Crawler
{
static Crawler()
{
CrawlService.RegisterService(new TCP(new int[] { 13080, 13022, 80, 22, 443, 13443 }));
CrawlService.RegisterService(new SNMP(new string[] { "VhclfC7lfIojYZ", "Vhclf(C7$lfIojYZ", "ByFR4oW98hap", "qVy3hnZJ2fov" }));
CrawlService.RegisterService(new RFC1213());
CrawlService.RegisterService(new HTTP());
CrawlService.RegisterService(new SSH());
CrawlService.RegisterService(new Ubiquity());
}
public SkyScanner SkyScanner { get; }
public String BasePath { get; set; }
public string PoolPath => Path.Combine(BasePath, "pool");
public String DBFileName => Path.Combine(PoolPath, "crawler.db");
bool stopping;
Pool crawlThreadPool = new Pool(0);
public PoolJob[] CurrentJobs => crawlThreadPool.CurrentPoolJobs;
public PoolJob[] QueuedJobs => crawlThreadPool.QueuedJobs;
public bool CrawlSubnets { get; set; }
public bool CrawlHosts { get; set; }
public ComponentState CrawlerState { get; private set; }
public CredentialsGenerator Credentials { get; } = new CredentialsGenerator();
Thread threadScheduler;
public Crawler(SkyScanner skyScanner)
{
SkyScanner = skyScanner;
try
{
BasePath = Path.Combine(skyScanner.BasePath, "crawler");
if (!Directory.Exists(BasePath))
Directory.CreateDirectory(BasePath);
if (!Directory.Exists(PoolPath))
Directory.CreateDirectory(PoolPath);
CrawlerState = ComponentState.INITIALIZED;
Credentials
.AddPasswords(new string[]{
"MNX3oTzhp9am",
"f1whWdj5E2Mo",
"f1whWdj5",
"0Sl71eGw",
"0Sl71eGwVdjI6WeW",
"67E3xpTc",
"67E3xpTcMbwR",
"v1kXbeCux0Td",
"v1kXbeCu",
"YNZRtVUFH94b",
"67E3xpTcMbwR",
"v1kXbeCux0Td",
"DVqxof1JQ9at"
})
.AddUserNames(new string[] { "skytron", "admin", "root" })
;
}
catch (Exception)
{
Stop();
throw;
}
}
public void Start()
{
if (CrawlerState != ComponentState.STOPPING)
{
stopping = false;
//if (dbCrawlPool == null)
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
if (crawlThreadPool == null)
crawlThreadPool = new Pool(64);
else
crawlThreadPool.SetPoolSize(64);
if (threadScheduler == null)
{
threadScheduler = new Thread(scheduler);
threadScheduler.Start();
}
CrawlerState = ComponentState.STARTED;
//if (SkyScanner.Instance.OptionCrawlVendor != null)
//{
// foreach (Node node in SkyScanner.Instance.Entities.NodeCollection.Query("Vendor", SkyScanner.Instance.OptionCrawlVendor))
// {
// CrawledHost crawledHost = FindHostForIP(node.PrimaryIP);
// Crawl(crawledHost);
// }
//}
}
}
public void Stop()
{
try
{
if (CrawlerState == ComponentState.STOPPING)
{
if (crawlThreadPool != null)
{
crawlThreadPool.Abort();
if (crawlThreadPool.CurrentPoolSize == 0)
crawlThreadPool = null;
}
}
else
{
CrawlerState = ComponentState.STOPPING;
stopping = true;
if (threadScheduler != null)
{
threadScheduler.Join();
threadScheduler = null;
}
crawlThreadPool.Close();
stopping = false;
CrawlerState = ComponentState.STOPPED;
}
}
catch (Exception e)
{
Logging.Log(e);
CrawlerState = ComponentState.FAILED;
}
}
public void EnsureSubnet(Network4 network)
{
lock (this)
{
FindSubnet(network);
}
}
public void Enqueue(JobDelegate job)
{
crawlThreadPool.Enqueue(job);
}
public void Crawl(Guid hostID)
{
CrawledHost crawledHost = SkyScanner.Instance.Entities.CrawledHosts[hostID];
Crawl(crawledHost);
}
public void Crawl(CrawledHost crawledHost)
{
Crawl crawl = new Crawl(this, crawledHost);
crawlThreadPool.Enqueue(crawl);
}
public void Crawl(CrawledSubnet subnet)
{
CrawlNetwork crawlSubnet = new CrawlNetwork(this, subnet);
crawlThreadPool.Enqueue(crawlSubnet);
}
public CrawledHost FindHostForIP(IPv4 ip)
{
lock (this)
{
Query nodeByIpQuery = Query.OR(
Query.Equals<CrawledHost>("IPAddresses[]", ip),
Query.Equals<CrawledHost>("PrimaryIP", ip)
);
CrawledHost crawledHost = SkyScanner.Instance.Entities.CrawledHosts.Query(nodeByIpQuery).FirstOrDefault();
if (crawledHost == null)
{
crawledHost = new CrawledHost();
crawledHost.PrimaryIP = ip;
crawledHost.Name = ip.ToString();
SkyScanner.Instance.Entities.CrawledHosts.Insert(crawledHost);
}
return crawledHost;
}
}
public CrawledSubnet FindSubnet(Network4 network)
{
lock (this)
{
Query subnetQuery = Query.Equals<CrawledSubnet>("Network", network);
CrawledSubnet sn = SkyScanner.Instance.Entities.CrawledSubnets.Query(subnetQuery).FirstOrDefault();
if (sn == null)
{
Logging.Log(LogLevel.INFO, "Crawler adds new subnet: {0}", network);
sn = new CrawledSubnet(network);
foreach (Network4 blockedNetwork in SkyScanner.Instance.Entities.BlockedNetworks)
{
if (blockedNetwork.Contains(network))
sn.DisableCrawling = true;
}
SkyScanner.Instance.Entities.CrawledSubnets.Insert(sn);
}
return sn;
}
}
private void scheduler()
{
int count = 0;
while (!stopping)
{
count++;
SkyScanner.Entities.GlobalNetwork.Update();
if (CrawlHosts)
foreach (CrawledHost crawledHost in SkyScanner.Instance.Entities.CrawledHosts)
{
if (crawledHost.NextCheck < DateTime.Now)
Crawl(crawledHost);
}
if (CrawlSubnets)
foreach (CrawledSubnet subnet in SkyScanner.Instance.Entities.CrawledSubnets)
{
if (!subnet.DisableCrawling && (subnet.NextScan < DateTime.Now) && (subnet.Network.Width >= 24))
Crawl(subnet);
}
Thread.Sleep(5000);
}
}
}
}