ln.skyscanner/crawl/Crawler.cs

275 lines
8.1 KiB
C#
Raw Normal View History

2019-03-13 08:20:53 +01:00
// /**
// * File: Crawler.cs
// * Author: haraldwolff
// *
// * This file and it's content is copyrighted by the Author and / or copyright holder.
// * Any use wihtout proper permission is illegal and may lead to legal actions.
// *
// *
// **/
using System;
2019-03-13 14:18:05 +01:00
using ln.types.threads;
2019-03-13 08:20:53 +01:00
using System.Net;
using System.Collections.Generic;
using System.IO;
using ln.logging;
using ln.types;
using System.Linq;
using ln.types.serialize;
using ln.skyscanner.entities;
using System.Net.NetworkInformation;
using ln.snmp;
using ln.snmp.endpoint;
using ln.snmp.rfc1213;
using ln.perfdb;
using ln.perfdb.storage;
using ln.skyscanner.check;
using System.Threading;
2019-03-15 07:43:12 +01:00
using ln.snmp.types;
2019-03-18 08:12:54 +01:00
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
2019-03-21 14:06:36 +01:00
using ln.types.odb;
2019-03-13 08:20:53 +01:00
namespace ln.skyscanner.crawl
{
2019-03-15 07:43:12 +01:00
public class Crawler
2019-03-13 08:20:53 +01:00
{
2019-03-14 13:31:15 +01:00
public SkyScanner SkyScanner { get; }
2019-03-13 08:20:53 +01:00
public String BasePath { get; set; }
public String PerfPath => Path.Combine(BasePath, "perfdb");
2019-03-21 14:06:36 +01:00
public string PoolPath => Path.Combine(BasePath, "pool");
2019-03-13 08:20:53 +01:00
bool stopping;
2019-03-18 08:12:54 +01:00
Pool crawlThreadPool = new Pool(0);
2019-03-13 08:20:53 +01:00
2019-03-15 15:35:44 +01:00
public PoolJob[] CurrentJobs => crawlThreadPool.CurrentPoolJobs;
public PoolJob[] QueuedJobs => crawlThreadPool.QueuedJobs;
2019-03-13 08:20:53 +01:00
2019-03-21 14:06:36 +01:00
public CrawlPool CrawlPool => dbCrawlPool?.Root;
public ODB<CrawlPool> DBCrawlPool => dbCrawlPool;
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
[JsonConverter(typeof(StringEnumConverter))]
public ComponentState CrawlerState { get; private set; }
2019-03-15 07:43:12 +01:00
Thread threadScheduler;
2019-03-13 08:20:53 +01:00
2019-03-21 14:06:36 +01:00
private ODB<CrawlPool> dbCrawlPool;
2019-03-14 13:31:15 +01:00
public Crawler(SkyScanner skyScanner)
2019-03-13 08:20:53 +01:00
{
2019-03-14 13:31:15 +01:00
SkyScanner = skyScanner;
2019-03-15 15:35:44 +01:00
try
{
BasePath = Path.Combine(skyScanner.BasePath, "crawler");
2019-03-14 13:31:15 +01:00
2019-03-15 15:35:44 +01:00
if (!Directory.Exists(BasePath))
Directory.CreateDirectory(BasePath);
if (!Directory.Exists(PerfPath))
Directory.CreateDirectory(PerfPath);
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
CrawlerState = ComponentState.INITIALIZED;
2019-03-13 08:20:53 +01:00
2019-03-15 15:35:44 +01:00
} catch (Exception)
{
Stop();
2019-03-13 08:20:53 +01:00
2019-03-15 15:35:44 +01:00
throw;
}
}
public void Start()
{
2019-03-18 08:12:54 +01:00
if (CrawlerState != ComponentState.STOPPING)
2019-03-15 15:35:44 +01:00
{
stopping = false;
2019-03-13 08:20:53 +01:00
2019-03-21 14:06:36 +01:00
dbCrawlPool = new ODB<CrawlPool>(PoolPath);
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
if (crawlThreadPool == null)
crawlThreadPool = new Pool(128);
else
crawlThreadPool.SetPoolSize(128);
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
if (threadScheduler == null)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
threadScheduler = new Thread(scheduler);
threadScheduler.Start();
2019-03-15 07:43:12 +01:00
}
2019-03-18 08:12:54 +01:00
CrawlerState = ComponentState.STARTED;
2019-03-15 07:43:12 +01:00
}
}
2019-03-18 08:12:54 +01:00
public void Stop()
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
try
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
if (CrawlerState == ComponentState.STOPPING)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
if (crawlThreadPool != null)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
crawlThreadPool.Abort();
2019-03-15 07:43:12 +01:00
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
if (crawlThreadPool.CurrentPoolSize == 0)
crawlThreadPool = null;
}
2019-03-15 07:43:12 +01:00
}
2019-03-18 08:12:54 +01:00
else
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
CrawlerState = ComponentState.STOPPING;
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
stopping = true;
threadScheduler.Join();
threadScheduler = null;
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
crawlThreadPool.Close();
2019-03-15 07:43:12 +01:00
2019-03-21 14:06:36 +01:00
Sync();
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
stopping = false;
CrawlerState = ComponentState.STOPPED;
2019-03-15 07:43:12 +01:00
}
2019-03-18 08:12:54 +01:00
} catch (Exception e)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
Logging.Log(e);
CrawlerState = ComponentState.FAILED;
2019-03-15 07:43:12 +01:00
}
}
2019-03-18 08:12:54 +01:00
public void Sync()
2019-03-21 14:06:36 +01:00
{
Sync(CrawlPool);
}
public void Sync(IPersistent persistent)
2019-03-15 07:43:12 +01:00
{
2019-03-18 08:12:54 +01:00
lock (CrawlPool)
2019-03-15 07:43:12 +01:00
{
2019-03-21 14:06:36 +01:00
dbCrawlPool.SavePersistent(persistent);
2019-03-13 08:20:53 +01:00
}
}
2019-03-18 08:12:54 +01:00
public void Enqueue(JobDelegate job)
2019-03-13 08:20:53 +01:00
{
2019-03-18 08:12:54 +01:00
crawlThreadPool.Enqueue(job);
}
2019-03-13 08:20:53 +01:00
2019-03-18 08:12:54 +01:00
public void Crawl(CIDR cidr)
{
try
2019-03-13 08:20:53 +01:00
{
2019-03-18 08:12:54 +01:00
if (cidr.MaskWidth == 32)
2019-03-13 08:20:53 +01:00
{
2019-03-18 08:12:54 +01:00
CrawledHost crawledHost = CrawlPool.HostForIP(cidr);
2019-03-21 07:43:32 +01:00
HostCrawl crawlHost = new HostCrawl(this, crawledHost);
2019-03-18 08:12:54 +01:00
crawlThreadPool.Enqueue(crawlHost);
2019-03-13 08:20:53 +01:00
}
2019-03-18 08:12:54 +01:00
else
2019-03-13 08:20:53 +01:00
{
2019-03-21 14:06:36 +01:00
CrawledSubnet subnet = CrawlPool.GetSubnet(cidr);
2019-03-21 07:43:32 +01:00
SubnetCrawl crawlSubnet = new SubnetCrawl(this, subnet);
2019-03-18 08:12:54 +01:00
crawlThreadPool.Enqueue(crawlSubnet);
2019-03-13 08:20:53 +01:00
}
2019-03-18 08:12:54 +01:00
} catch (Exception e)
{
Logging.Log(e);
2019-03-13 08:20:53 +01:00
}
}
2019-03-15 07:43:12 +01:00
private void scheduler()
2019-03-13 08:20:53 +01:00
{
while (!stopping)
{
2019-03-15 07:43:12 +01:00
foreach (CrawledHost crawledHost in CrawlPool.Hosts)
{
2019-03-21 14:06:36 +01:00
CrawledHost crawledHost2 = CrawlPool.HostForIP(crawledHost.PrimaryIP);
if (crawledHost != crawledHost2)
{
CrawlPool.RemoveHost(crawledHost);
} else if (crawledHost.NextCheck < DateTime.Now)
2019-03-15 07:43:12 +01:00
{
Crawl(crawledHost.PrimaryIP);
}
}
2019-03-13 08:20:53 +01:00
2019-03-21 14:06:36 +01:00
foreach (CrawledSubnet subnet in CrawlPool.Subnets.ToArray())
2019-03-13 08:20:53 +01:00
{
2019-03-21 07:43:32 +01:00
if (subnet.NextScan < (DateTime.Now - TimeSpan.FromDays(1)) && subnet.Network.MaskWidth >= 24)
2019-03-13 08:20:53 +01:00
{
2019-03-18 08:12:54 +01:00
Crawl(subnet.Network);
2019-03-13 08:20:53 +01:00
}
}
2019-03-15 07:43:12 +01:00
2019-03-18 08:12:54 +01:00
Thread.Sleep(2500);
2019-03-13 08:20:53 +01:00
}
}
2019-03-15 07:43:12 +01:00
///** PerfDB **/
2019-03-13 08:20:53 +01:00
2019-03-15 07:43:12 +01:00
//Dictionary<string, PerfFile> perfFiles = new Dictionary<string, PerfFile>();
2019-03-13 08:20:53 +01:00
2019-03-15 07:43:12 +01:00
//public PerfFile GetPerfFile(string name)
//{
// if (perfFiles.ContainsKey(name))
// return perfFiles[name];
2019-03-13 08:20:53 +01:00
2019-03-15 07:43:12 +01:00
// PerfFile perfFile = new PerfFile(Path.Combine(PerfPath, String.Format("{0}.perf", name)));
// perfFile.Open();
2019-03-13 08:20:53 +01:00
2019-03-15 07:43:12 +01:00
// perfFiles.Add(name, perfFile);
// if (perfFile.FirstSection == null)
// {
// PerfFile.PerfFileSection section = new PerfFile.PerfFileSection(perfFile, null, 1440, 60, AggregationMethod.AVERAGE);
// section = new PerfFile.PerfFileSection(perfFile, section, 1728, 300, AggregationMethod.AVERAGE);
// section = new PerfFile.PerfFileSection(perfFile, section, 2016, 900, AggregationMethod.AVERAGE);
// section = new PerfFile.PerfFileSection(perfFile, section, 1344, 3600, AggregationMethod.AVERAGE);
// section = new PerfFile.PerfFileSection(perfFile, section, 1344, 10800, AggregationMethod.AVERAGE);
// }
// return perfFile;
//}
2019-03-13 08:20:53 +01:00
}
}
2019-03-15 07:43:12 +01:00
// //SnmpV1Endpoint v1endpoint = new SnmpV1Endpoint(engine, new IPEndPoint(IPAddress.Parse("10.75.1.10"), 161), "ByFR4oW98hap");
// //SnmpV2Endpoint v2endpoint = new SnmpV2Endpoint(engine, new IPEndPoint(IPAddress.Parse("10.113.254.4"), 161), "ghE7wUmFPoPpkRno");
// USMEndpoint v3endpoint = new USMEndpoint(SNMPEngine, new IPEndPoint(host, 161));
// v3endpoint.AuthMethod = SnmpV3AuthMethod.SHA;
// v3endpoint.AuthKeyPhrase = "qVy3hnZJ2fov";
// v3endpoint.Username = "skytron";
// try
// {
// RFC1213.Interface[] interfaces = RFC1213.GetInterfaces(v3endpoint);
// foreach (RFC1213.Interface netIf in interfaces)
// {
// Logging.Log(LogLevel.INFO, "Interface: {0}", netIf);
// foreach (CIDR ip in netIf.IPAddresses)
// {
// Subnet subnet = CrawlPool.GetSubnet(ip.Network);
// if ((DateTime.Now - subnet.LastScanned).Hours >= 1)
// {
// Enqueue(() => crawlSubnet(ip.Network));
// }
// }
// }
// }
// catch (TimeoutException)
// {
// Logging.Log(LogLevel.INFO, "Host: {0} SNMP communication timed out.", host);
// }
//}