ln.skyscanner/crawl/Crawler.cs

251 lines
7.2 KiB
C#

// /**
// * File: Crawler.cs
// * Author: haraldwolff
// *
// * This file and it's content is copyrighted by the Author and / or copyright holder.
// * Any use wihtout proper permission is illegal and may lead to legal actions.
// *
// *
// **/
using System;
using ln.types.threads;
using System.Net;
using System.Collections.Generic;
using System.IO;
using ln.logging;
using ln.types;
using System.Linq;
using ln.types.serialize;
using ln.skyscanner.entities;
using System.Net.NetworkInformation;
using ln.snmp;
using ln.snmp.endpoint;
using ln.perfdb;
using ln.perfdb.storage;
using ln.skyscanner.check;
using System.Threading;
using ln.snmp.types;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using ln.types.odb;
using ln.skyscanner.crawl.service;
using ln.skyscanner.crawl.tests;
using LiteDB;
namespace ln.skyscanner.crawl
{
public class Crawler
{
static Crawler()
{
CrawlService.RegisterService(new TCP(new int[] { 13080, 13022, 80, 22, 443, 13443 }));
CrawlService.RegisterService(new SNMP(new string[] { "VhclfC7lfIojYZ", "Vhclf(C7$lfIojYZ", "ByFR4oW98hap", "qVy3hnZJ2fov" }));
CrawlService.RegisterService(new RFC1213());
}
public SkyScanner SkyScanner { get; }
public String BasePath { get; set; }
public String PerfPath => Path.Combine(BasePath, "perfdb");
public string PoolPath => Path.Combine(BasePath, "pool");
public String DBFileName => Path.Combine(PoolPath, "crawler.db");
bool stopping;
Pool crawlThreadPool = new Pool(0);
public PoolJob[] CurrentJobs => crawlThreadPool.CurrentPoolJobs;
public PoolJob[] QueuedJobs => crawlThreadPool.QueuedJobs;
public bool CrawlSubnets { get; set; }
public bool CrawlHosts { get; set; }
[JsonConverter(typeof(StringEnumConverter))]
public ComponentState CrawlerState { get; private set; }
Thread threadScheduler;
public ODBCollection<CrawledHost> CrawledHosts => hosts;
public ODBCollection<CrawledSubnet> CrawledSubnets => subnets;
//private ODB<CrawlPool> dbCrawlPool;
private ODB odbDatabase;
private ODBCollection<CrawledHost> hosts;
private ODBCollection<CrawledSubnet> subnets;
public Crawler(SkyScanner skyScanner)
{
SkyScanner = skyScanner;
try
{
BasePath = Path.Combine(skyScanner.BasePath, "crawler");
if (!Directory.Exists(BasePath))
Directory.CreateDirectory(BasePath);
if (!Directory.Exists(PerfPath))
Directory.CreateDirectory(PerfPath);
if (!Directory.Exists(PoolPath))
Directory.CreateDirectory(PoolPath);
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
odbDatabase = new ODB(BasePath);
hosts = odbDatabase.GetCollection<CrawledHost>();
subnets = odbDatabase.GetCollection<CrawledSubnet>();
CrawlerState = ComponentState.INITIALIZED;
}
catch (Exception)
{
Stop();
throw;
}
}
public void Start()
{
if (CrawlerState != ComponentState.STOPPING)
{
stopping = false;
//if (dbCrawlPool == null)
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
if (crawlThreadPool == null)
crawlThreadPool = new Pool(64);
else
crawlThreadPool.SetPoolSize(64);
if (threadScheduler == null)
{
threadScheduler = new Thread(scheduler);
threadScheduler.Start();
}
CrawlerState = ComponentState.STARTED;
}
}
public void Stop()
{
try
{
if (CrawlerState == ComponentState.STOPPING)
{
if (crawlThreadPool != null)
{
crawlThreadPool.Abort();
if (crawlThreadPool.CurrentPoolSize == 0)
crawlThreadPool = null;
}
}
else
{
CrawlerState = ComponentState.STOPPING;
stopping = true;
if (threadScheduler != null)
{
threadScheduler.Join();
threadScheduler = null;
}
crawlThreadPool.Close();
stopping = false;
CrawlerState = ComponentState.STOPPED;
}
}
catch (Exception e)
{
Logging.Log(e);
CrawlerState = ComponentState.FAILED;
}
}
public void EnsureSubnet(CIDR network)
{
FindSubnet(network);
}
public void Enqueue(JobDelegate job)
{
crawlThreadPool.Enqueue(job);
}
public void Crawl(Guid hostID)
{
CrawledHost crawledHost = CrawledHosts[hostID];
Crawl(crawledHost);
}
public void Crawl(CrawledHost crawledHost)
{
Crawl crawl = new Crawl(this, crawledHost);
crawlThreadPool.Enqueue(crawl);
}
public void Crawl(CrawledSubnet subnet)
{
CrawlNetwork crawlSubnet = new CrawlNetwork(this, subnet);
crawlThreadPool.Enqueue(crawlSubnet);
}
public CrawledHost FindHostForIP(CIDR ip)
{
CrawledHost crawledHost = CrawledHosts.Where(host => host.HasIP(ip)).FirstOrDefault();
if (crawledHost == null)
{
crawledHost = new CrawledHost();
crawledHost.PrimaryIP = ip;
crawledHost.Name = ip.ToString();
CrawledHosts.Insert(crawledHost);
}
return crawledHost;
}
public CrawledSubnet FindSubnet(CIDR network)
{
CrawledSubnet sn = CrawledSubnets.Where(subnet => subnet.Network.Equals(network)).FirstOrDefault();
if (sn == null)
{
sn = new CrawledSubnet(network);
CrawledSubnets.Insert(sn);
}
return sn;
}
private void scheduler()
{
int count = 0;
while (!stopping)
{
count++;
SkyScanner.Entities.GlobalNetwork.Update();
if (CrawlHosts)
foreach (CrawledHost crawledHost in CrawledHosts.Where( host => (host.NextCheck < DateTime.Now)))
{
Crawl(crawledHost);
}
if (CrawlSubnets)
foreach (CrawledSubnet subnet in CrawledSubnets.Where( sn => (sn.NextScan < (DateTime.Now - TimeSpan.FromDays(1)) && sn.Network.MaskWidth >= 24)))
{
Crawl(subnet);
}
Thread.Sleep(5000);
}
}
}
}