commit 117cfbc3f3923d63d9940fd1e4826e64a5945e65 Author: haraldwolff Date: Tue Jan 30 09:19:27 2024 +0100 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..add57be --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +bin/ +obj/ +/packages/ +riderModule.iml +/_ReSharper.Caches/ \ No newline at end of file diff --git a/.idea/.idea.ln.tools.mailpdfextract/.idea/.gitignore b/.idea/.idea.ln.tools.mailpdfextract/.idea/.gitignore new file mode 100644 index 0000000..9e3f41c --- /dev/null +++ b/.idea/.idea.ln.tools.mailpdfextract/.idea/.gitignore @@ -0,0 +1,13 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Rider ignored files +/modules.xml +/projectSettingsUpdater.xml +/contentModel.xml +/.idea.ln.tools.mailpdfextract.iml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/.idea.ln.tools.mailpdfextract/.idea/indexLayout.xml b/.idea/.idea.ln.tools.mailpdfextract/.idea/indexLayout.xml new file mode 100644 index 0000000..7b08163 --- /dev/null +++ b/.idea/.idea.ln.tools.mailpdfextract/.idea/indexLayout.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/.idea.ln.tools.mailpdfextract/.idea/vcs.xml b/.idea/.idea.ln.tools.mailpdfextract/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/.idea.ln.tools.mailpdfextract/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ln.tools.mailpdfextract.sln b/ln.tools.mailpdfextract.sln new file mode 100644 index 0000000..effed32 --- /dev/null +++ b/ln.tools.mailpdfextract.sln @@ -0,0 +1,16 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ln.tools.mailpdfextract", "ln.tools.mailpdfextract\ln.tools.mailpdfextract.csproj", "{BDBB4B03-EF53-4019-9695-48968CAFA6CF}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/ln.tools.mailpdfextract/HashingStream.cs b/ln.tools.mailpdfextract/HashingStream.cs new file mode 100644 index 0000000..b55e447 --- /dev/null +++ b/ln.tools.mailpdfextract/HashingStream.cs @@ -0,0 +1,57 @@ +using System.Security.Cryptography; + +namespace ln.tools.mailpdfextract; + +public class HashingStream : Stream, IDisposable +{ + public Stream BaseStream { get; } + + SHA256 _sha256 = SHA256.Create(); + + public byte[] Hash + { + get + { + Dispose(); + return _sha256.Hash; + } + } + + public HashingStream(Stream baseStream) + { + BaseStream = baseStream; + } + + public override void Flush() => BaseStream.Flush(); + public override int Read(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + + public override void Write(byte[] buffer, int offset, int count) + { + _sha256.TransformBlock(buffer, offset, count, null, 0); + BaseStream.Write(buffer, offset, count); + } + + public override bool CanRead => false; + public override bool CanSeek => false; + public override bool CanWrite => true; + public override long Length => BaseStream.Length; + + public override long Position + { + get => BaseStream.Position; + set => throw new NotSupportedException(); + } + + public void Dispose() + { + if (BaseStream is Stream) + { + _sha256.TransformFinalBlock(null, 0, 0); + BaseStream?.Dispose(); + base.Dispose(); + } + } + +} \ No newline at end of file diff --git a/ln.tools.mailpdfextract/ImapCredentials.cs b/ln.tools.mailpdfextract/ImapCredentials.cs new file mode 100644 index 0000000..d197945 --- /dev/null +++ b/ln.tools.mailpdfextract/ImapCredentials.cs @@ -0,0 +1,10 @@ +namespace ln.tools.mailpdfextract; + +public class ImapCredentials +{ + public string Hostname { get; set; } = "localhost"; + public int Port { get; set; } = 143; + + public string User { get; set; } = "nobody"; + public string Password { get; set; } = ""; +} \ No newline at end of file diff --git a/ln.tools.mailpdfextract/Program.cs b/ln.tools.mailpdfextract/Program.cs new file mode 100644 index 0000000..0240f0a --- /dev/null +++ b/ln.tools.mailpdfextract/Program.cs @@ -0,0 +1,146 @@ +using System.Text.Json; +using MailKit; +using MailKit.Net.Imap; +using MailKit.Security; +using MimeKit; +using UglyToad.PdfPig; + +namespace ln.tools.mailpdfextract +{ + + public class MailPdfExtractApplication + { + public string OutputDirectory { get; set; } + + public int MaxItemsPerRun { get; set; } = 1000; + + private ImapCredentials _credentials; + + + + public MailPdfExtractApplication(string outputDirectory, ImapCredentials credentials) + { + _credentials = credentials; + OutputDirectory = outputDirectory; + } + + private HashSet _mailIdSet = new HashSet(); + private HashSet _pdfHashSet = new HashSet(); + + public void Start() + { + Initialize(); + + ImapClient imapClient = new ImapClient(); + imapClient.Connect(_credentials.Hostname, _credentials.Port, SecureSocketOptions.StartTls); + imapClient.Authenticate(_credentials.User, _credentials.Password); + + int countItems = 0; + + var inbox = imapClient.Inbox; + inbox.Open(FolderAccess.ReadOnly); + + for (int n = 0; n < inbox.Count; n++) + { + var message = inbox.GetMessage(n); + if (!_mailIdSet.Contains(message.MessageId)) + { + _mailIdSet.Add(message.MessageId); + Console.WriteLine($"Message: {message.Sender:32} | {message.Subject}"); + + foreach (var attachment in message.Attachments) + { + if (attachment.ContentType.MimeType.Equals("application/pdf")) + { + if (attachment.ContentDisposition.FileName is string pdfFileName) + { + string pdfFullFileName = Path.Combine(OutputDirectory, pdfFileName); + using (FileStream f = File.Create(pdfFullFileName)) + { + ((MimePart)attachment).Content.DecodeTo(f); + } + + File.SetCreationTime(pdfFullFileName, message.Date.Date); + File.SetLastWriteTime(pdfFullFileName, message.Date.Date); + + PdfDocument document = PdfDocument.Open(pdfFullFileName); + if (document.Advanced.TryGetEmbeddedFiles(out var embeddedFiles)) + { + foreach (var embeddedFile in embeddedFiles) + { + Console.WriteLine(" ***** EMBEDDED ***** {0}",embeddedFile.Name); + if (embeddedFile.Name.Equals("factur-x.xml")) + { + string xfacturFileName = Path.Combine( + OutputDirectory, + Path.GetFileNameWithoutExtension(pdfFileName) + ".factur-x.xml" + ); + using (FileStream fs = new FileStream(xfacturFileName, FileMode.Create)) + fs.Write(embeddedFile.Bytes.ToArray()); + + File.SetCreationTime(xfacturFileName, message.Date.Date); + File.SetLastWriteTime(xfacturFileName, message.Date.Date); + } + } + } + + countItems++; + } + } + } + } + + if (countItems >= MaxItemsPerRun) + break; + } + + imapClient.Disconnect(true); + SaveIndeces(); + } + + public void Initialize() + { + var fn = Path.Combine(OutputDirectory, ".mail.ids"); + if (File.Exists(fn)) + _mailIdSet = new HashSet(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0))); + + fn = Path.Combine(OutputDirectory, ".pdf.ids"); + if (File.Exists(fn)) + _pdfHashSet = new HashSet(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0))); + } + + public void SaveIndeces() + { + var fn = Path.Combine(OutputDirectory, ".mail.ids"); + File.WriteAllLines(fn, _mailIdSet.ToArray()); + fn = Path.Combine(OutputDirectory, ".pdf.ids"); + File.WriteAllLines(fn, _pdfHashSet.ToArray()); + } + + + public static void Main(string outputDirectory = ".", FileInfo credentialsFile = null, int maxDocuments = 0) + { + if (!Directory.Exists(outputDirectory)) + Directory.CreateDirectory(outputDirectory); + + if (credentialsFile is null) + { + string fn = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),"mailpdfextract.json"); + if (File.Exists(fn)) + credentialsFile = new FileInfo(fn); + else + throw new FileNotFoundException("no credential file found! " + fn); + } + + ImapCredentials credentials = JsonSerializer.Deserialize(File.ReadAllText(credentialsFile.FullName))!; + + MailPdfExtractApplication app = new MailPdfExtractApplication(outputDirectory, credentials); + + if (maxDocuments != 0) + app.MaxItemsPerRun = maxDocuments; + + app.Start(); + } + } + +} \ No newline at end of file diff --git a/ln.tools.mailpdfextract/ln.tools.mailpdfextract.csproj b/ln.tools.mailpdfextract/ln.tools.mailpdfextract.csproj new file mode 100644 index 0000000..ae0ccc8 --- /dev/null +++ b/ln.tools.mailpdfextract/ln.tools.mailpdfextract.csproj @@ -0,0 +1,16 @@ + + + + Exe + net7.0 + enable + enable + + + + + + + + +