ln.tools.mailpdfextract/ln.tools.mailpdfextract/Program.cs

146 lines
5.9 KiB
C#

using System.Text.Json;
using MailKit;
using MailKit.Net.Imap;
using MailKit.Security;
using MimeKit;
using UglyToad.PdfPig;
namespace ln.tools.mailpdfextract
{
public class MailPdfExtractApplication
{
public string OutputDirectory { get; set; }
public int MaxItemsPerRun { get; set; } = 1000;
private ImapCredentials _credentials;
public MailPdfExtractApplication(string outputDirectory, ImapCredentials credentials)
{
_credentials = credentials;
OutputDirectory = outputDirectory;
}
private HashSet<string> _mailIdSet = new HashSet<string>();
private HashSet<string> _pdfHashSet = new HashSet<string>();
public void Start()
{
Initialize();
ImapClient imapClient = new ImapClient();
imapClient.Connect(_credentials.Hostname, _credentials.Port, SecureSocketOptions.StartTls);
imapClient.Authenticate(_credentials.User, _credentials.Password);
int countItems = 0;
var inbox = imapClient.Inbox;
inbox.Open(FolderAccess.ReadOnly);
for (int n = 0; n < inbox.Count; n++)
{
var message = inbox.GetMessage(n);
if (!_mailIdSet.Contains(message.MessageId))
{
_mailIdSet.Add(message.MessageId);
Console.WriteLine($"Message: {message.Sender:32} | {message.Subject}");
foreach (var attachment in message.Attachments)
{
if (attachment.ContentType.MimeType.Equals("application/pdf"))
{
if (attachment.ContentDisposition.FileName is string pdfFileName)
{
string pdfFullFileName = Path.Combine(OutputDirectory, pdfFileName);
using (FileStream f = File.Create(pdfFullFileName))
{
((MimePart)attachment).Content.DecodeTo(f);
}
File.SetCreationTime(pdfFullFileName, message.Date.Date);
File.SetLastWriteTime(pdfFullFileName, message.Date.Date);
PdfDocument document = PdfDocument.Open(pdfFullFileName);
if (document.Advanced.TryGetEmbeddedFiles(out var embeddedFiles))
{
foreach (var embeddedFile in embeddedFiles)
{
Console.WriteLine(" ***** EMBEDDED ***** {0}",embeddedFile.Name);
if (embeddedFile.Name.Equals("factur-x.xml"))
{
string xfacturFileName = Path.Combine(
OutputDirectory,
Path.GetFileNameWithoutExtension(pdfFileName) + ".factur-x.xml"
);
using (FileStream fs = new FileStream(xfacturFileName, FileMode.Create))
fs.Write(embeddedFile.Bytes.ToArray());
File.SetCreationTime(xfacturFileName, message.Date.Date);
File.SetLastWriteTime(xfacturFileName, message.Date.Date);
}
}
}
countItems++;
}
}
}
}
if (countItems >= MaxItemsPerRun)
break;
}
imapClient.Disconnect(true);
SaveIndeces();
}
public void Initialize()
{
var fn = Path.Combine(OutputDirectory, ".mail.ids");
if (File.Exists(fn))
_mailIdSet = new HashSet<string>(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0)));
fn = Path.Combine(OutputDirectory, ".pdf.ids");
if (File.Exists(fn))
_pdfHashSet = new HashSet<string>(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0)));
}
public void SaveIndeces()
{
var fn = Path.Combine(OutputDirectory, ".mail.ids");
File.WriteAllLines(fn, _mailIdSet.ToArray());
fn = Path.Combine(OutputDirectory, ".pdf.ids");
File.WriteAllLines(fn, _pdfHashSet.ToArray());
}
public static void Main(string outputDirectory = ".", FileInfo credentialsFile = null, int maxDocuments = 0)
{
if (!Directory.Exists(outputDirectory))
Directory.CreateDirectory(outputDirectory);
if (credentialsFile is null)
{
string fn = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),"mailpdfextract.json");
if (File.Exists(fn))
credentialsFile = new FileInfo(fn);
else
throw new FileNotFoundException("no credential file found! " + fn);
}
ImapCredentials credentials = JsonSerializer.Deserialize<ImapCredentials>(File.ReadAllText(credentialsFile.FullName))!;
MailPdfExtractApplication app = new MailPdfExtractApplication(outputDirectory, credentials);
if (maxDocuments != 0)
app.MaxItemsPerRun = maxDocuments;
app.Start();
}
}
}