ln.skyscanner/crawl/Crawler.cs

252 lines
7.3 KiB
C#
Raw Normal View History

2019-03-13 08:20:53 +01:00
// /**
// * File: Crawler.cs
// * Author: haraldwolff
// *
// * This file and it's content is copyrighted by the Author and / or copyright holder.
// * Any use wihtout proper permission is illegal and may lead to legal actions.
// *
// *
// **/
using System;
2019-03-13 14:18:05 +01:00
using ln.types.threads;
2019-03-13 08:20:53 +01:00
using System.Net;
using System.Collections.Generic;
using System.IO;
using ln.logging;
using ln.types;
using System.Linq;
using ln.types.serialize;
using ln.skyscanner.entities;
using System.Net.NetworkInformation;
using ln.snmp;
using ln.snmp.endpoint;
using ln.perfdb;
using ln.perfdb.storage;
using ln.skyscanner.check;
using System.Threading;
2019-03-15 07:43:12 +01:00
using ln.snmp.types;
2019-03-18 08:12:54 +01:00
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
2019-03-21 14:06:36 +01:00
using ln.types.odb;
2019-03-26 12:53:42 +01:00
using ln.skyscanner.crawl.service;
using ln.skyscanner.crawl.tests;
using LiteDB;
2019-03-13 08:20:53 +01:00
namespace ln.skyscanner.crawl
{
2019-03-15 07:43:12 +01:00
public class Crawler
2019-03-13 08:20:53 +01:00
{
2019-03-26 12:53:42 +01:00
static Crawler()
{
CrawlService.RegisterService(new TCP(new int[] { 13080, 13022, 80, 22, 443, 13443 }));
CrawlService.RegisterService(new SNMP(new string[] { "VhclfC7lfIojYZ", "Vhclf(C7$lfIojYZ", "ByFR4oW98hap", "qVy3hnZJ2fov" }));
CrawlService.RegisterService(new RFC1213());
}
2019-03-14 13:31:15 +01:00
public SkyScanner SkyScanner { get; }
2019-03-13 08:20:53 +01:00
public String BasePath { get; set; }
public String PerfPath => Path.Combine(BasePath, "perfdb");
2019-03-21 14:06:36 +01:00
public string PoolPath => Path.Combine(BasePath, "pool");
2019-03-26 12:53:42 +01:00
public String DBFileName => Path.Combine(PoolPath, "crawler.db");
2019-03-13 08:20:53 +01:00
bool stopping;
2019-03-18 08:12:54 +01:00
Pool crawlThreadPool = new Pool(0);
2019-03-13 08:20:53 +01:00
2019-03-15 15:35:44 +01:00
public PoolJob[] CurrentJobs => crawlThreadPool.CurrentPoolJobs;
public PoolJob[] QueuedJobs => crawlThreadPool.QueuedJobs;
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
public bool CrawlSubnets { get; set; }
public bool CrawlHosts { get; set; }
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
[JsonConverter(typeof(StringEnumConverter))]
public ComponentState CrawlerState { get; private set; }
2019-03-15 07:43:12 +01:00
Thread threadScheduler;
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
public ODBCollection<CrawledHost> CrawledHosts => hosts;
public ODBCollection<CrawledSubnet> CrawledSubnets => subnets;
//private ODB<CrawlPool> dbCrawlPool;
private ODB odbDatabase;
private ODBCollection<CrawledHost> hosts;
private ODBCollection<CrawledSubnet> subnets;
2019-03-21 14:06:36 +01:00
2019-03-14 13:31:15 +01:00
public Crawler(SkyScanner skyScanner)
2019-03-13 08:20:53 +01:00
{
2019-03-14 13:31:15 +01:00
SkyScanner = skyScanner;
2019-03-15 15:35:44 +01:00
try
{
BasePath = Path.Combine(skyScanner.BasePath, "crawler");
2019-03-14 13:31:15 +01:00
2019-03-15 15:35:44 +01:00
if (!Directory.Exists(BasePath))
Directory.CreateDirectory(BasePath);
if (!Directory.Exists(PerfPath))
Directory.CreateDirectory(PerfPath);
2019-03-26 12:53:42 +01:00
if (!Directory.Exists(PoolPath))
Directory.CreateDirectory(PoolPath);
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
odbDatabase = new ODB(BasePath);
hosts = odbDatabase.GetCollection<CrawledHost>();
subnets = odbDatabase.GetCollection<CrawledSubnet>();
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
CrawlerState = ComponentState.INITIALIZED;
}
catch (Exception)
2019-03-15 15:35:44 +01:00
{
Stop();
2019-03-13 08:20:53 +01:00
2019-03-15 15:35:44 +01:00
throw;
}
}
public void Start()
{
2019-03-18 08:12:54 +01:00
if (CrawlerState != ComponentState.STOPPING)
2019-03-15 15:35:44 +01:00
{
stopping = false;
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
//if (dbCrawlPool == null)
//dbCrawlPool = new ODB<CrawlPool>(PoolPath);
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
if (crawlThreadPool == null)
2019-03-26 12:53:42 +01:00
crawlThreadPool = new Pool(64);
2019-03-18 08:12:54 +01:00
else
2019-03-26 12:53:42 +01:00
crawlThreadPool.SetPoolSize(64);
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
if (threadScheduler == null)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
threadScheduler = new Thread(scheduler);
threadScheduler.Start();
2019-03-15 07:43:12 +01:00
}
2019-03-18 08:12:54 +01:00
CrawlerState = ComponentState.STARTED;
2019-03-15 07:43:12 +01:00
}
}
2019-03-18 08:12:54 +01:00
public void Stop()
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
try
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
if (CrawlerState == ComponentState.STOPPING)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
if (crawlThreadPool != null)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
crawlThreadPool.Abort();
2019-03-15 07:43:12 +01:00
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
if (crawlThreadPool.CurrentPoolSize == 0)
crawlThreadPool = null;
}
2019-03-15 07:43:12 +01:00
}
2019-03-18 08:12:54 +01:00
else
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
CrawlerState = ComponentState.STOPPING;
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
stopping = true;
2019-03-26 12:53:42 +01:00
if (threadScheduler != null)
{
threadScheduler.Join();
threadScheduler = null;
}
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
crawlThreadPool.Close();
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
stopping = false;
CrawlerState = ComponentState.STOPPED;
2019-03-15 07:43:12 +01:00
}
2019-03-26 12:53:42 +01:00
}
catch (Exception e)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
Logging.Log(e);
CrawlerState = ComponentState.FAILED;
2019-03-15 07:43:12 +01:00
}
}
2019-03-27 07:49:49 +01:00
public void EnsureSubnet(CIDR network)
{
FindSubnet(network);
}
2019-03-26 12:53:42 +01:00
public void Enqueue(JobDelegate job)
{
crawlThreadPool.Enqueue(job);
}
public void Crawl(Guid hostID)
2019-03-21 14:06:36 +01:00
{
2019-03-26 12:53:42 +01:00
CrawledHost crawledHost = CrawledHosts[hostID];
Crawl(crawledHost);
2019-03-21 14:06:36 +01:00
}
2019-03-26 12:53:42 +01:00
public void Crawl(CrawledHost crawledHost)
2019-03-15 07:43:12 +01:00
{
2019-03-26 12:53:42 +01:00
Crawl crawl = new Crawl(this, crawledHost);
crawlThreadPool.Enqueue(crawl);
2019-03-13 08:20:53 +01:00
}
2019-03-26 12:53:42 +01:00
public void Crawl(CrawledSubnet subnet)
2019-03-13 08:20:53 +01:00
{
2019-03-26 12:53:42 +01:00
CrawlNetwork crawlSubnet = new CrawlNetwork(this, subnet);
crawlThreadPool.Enqueue(crawlSubnet);
2019-03-18 08:12:54 +01:00
}
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
public CrawledHost FindHostForIP(CIDR ip)
2019-03-18 08:12:54 +01:00
{
2019-03-26 12:53:42 +01:00
CrawledHost crawledHost = CrawledHosts.Where(host => host.HasIP(ip)).FirstOrDefault();
if (crawledHost == null)
2019-03-13 08:20:53 +01:00
{
2019-03-26 12:53:42 +01:00
crawledHost = new CrawledHost();
crawledHost.PrimaryIP = ip;
crawledHost.Name = ip.ToString();
CrawledHosts.Insert(crawledHost);
}
return crawledHost;
}
public CrawledSubnet FindSubnet(CIDR network)
{
CrawledSubnet sn = CrawledSubnets.Where(subnet => subnet.Network.Equals(network)).FirstOrDefault();
if (sn == null)
2019-03-18 08:12:54 +01:00
{
2019-03-29 13:57:06 +01:00
Logging.Log(LogLevel.INFO, "Crawler adds new subnet: {0}",network);
2019-03-26 12:53:42 +01:00
sn = new CrawledSubnet(network);
CrawledSubnets.Insert(sn);
2019-03-13 08:20:53 +01:00
}
2019-03-26 12:53:42 +01:00
return sn;
2019-03-13 08:20:53 +01:00
}
2019-03-26 12:53:42 +01:00
2019-03-15 07:43:12 +01:00
private void scheduler()
2019-03-13 08:20:53 +01:00
{
2019-03-26 12:53:42 +01:00
int count = 0;
2019-03-13 08:20:53 +01:00
while (!stopping)
{
2019-03-26 12:53:42 +01:00
count++;
2019-03-21 14:06:36 +01:00
2019-03-26 12:53:42 +01:00
SkyScanner.Entities.GlobalNetwork.Update();
if (CrawlHosts)
foreach (CrawledHost crawledHost in CrawledHosts.Where( host => (host.NextCheck < DateTime.Now)))
2019-03-15 07:43:12 +01:00
{
2019-03-26 12:53:42 +01:00
Crawl(crawledHost);
2019-03-15 07:43:12 +01:00
}
2019-03-13 08:20:53 +01:00
2019-03-26 12:53:42 +01:00
if (CrawlSubnets)
foreach (CrawledSubnet subnet in CrawledSubnets.Where( sn => (sn.NextScan < (DateTime.Now - TimeSpan.FromDays(1)) && sn.Network.MaskWidth >= 24)))
2019-03-13 08:20:53 +01:00
{
2019-03-26 12:53:42 +01:00
Crawl(subnet);
2019-03-13 08:20:53 +01:00
}
2019-03-15 07:43:12 +01:00
2019-03-26 12:53:42 +01:00
Thread.Sleep(5000);
2019-03-13 08:20:53 +01:00
}
}
}
2019-03-26 12:53:42 +01:00
}