Initial Commit

master
Harald Wolff 2024-01-30 09:19:27 +01:00
commit 117cfbc3f3
9 changed files with 277 additions and 0 deletions

5
.gitignore vendored 100644
View File

@ -0,0 +1,5 @@
bin/
obj/
/packages/
riderModule.iml
/_ReSharper.Caches/

View File

@ -0,0 +1,13 @@
# Default ignored files
/shelf/
/workspace.xml
# Rider ignored files
/modules.xml
/projectSettingsUpdater.xml
/contentModel.xml
/.idea.ln.tools.mailpdfextract.iml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="UserContentModel">
<attachedFolders />
<explicitIncludes />
<explicitExcludes />
</component>
</project>

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,16 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ln.tools.mailpdfextract", "ln.tools.mailpdfextract\ln.tools.mailpdfextract.csproj", "{BDBB4B03-EF53-4019-9695-48968CAFA6CF}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BDBB4B03-EF53-4019-9695-48968CAFA6CF}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,57 @@
using System.Security.Cryptography;
namespace ln.tools.mailpdfextract;
public class HashingStream : Stream, IDisposable
{
public Stream BaseStream { get; }
SHA256 _sha256 = SHA256.Create();
public byte[] Hash
{
get
{
Dispose();
return _sha256.Hash;
}
}
public HashingStream(Stream baseStream)
{
BaseStream = baseStream;
}
public override void Flush() => BaseStream.Flush();
public override int Read(byte[] buffer, int offset, int count) => throw new NotSupportedException();
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count)
{
_sha256.TransformBlock(buffer, offset, count, null, 0);
BaseStream.Write(buffer, offset, count);
}
public override bool CanRead => false;
public override bool CanSeek => false;
public override bool CanWrite => true;
public override long Length => BaseStream.Length;
public override long Position
{
get => BaseStream.Position;
set => throw new NotSupportedException();
}
public void Dispose()
{
if (BaseStream is Stream)
{
_sha256.TransformFinalBlock(null, 0, 0);
BaseStream?.Dispose();
base.Dispose();
}
}
}

View File

@ -0,0 +1,10 @@
namespace ln.tools.mailpdfextract;
public class ImapCredentials
{
public string Hostname { get; set; } = "localhost";
public int Port { get; set; } = 143;
public string User { get; set; } = "nobody";
public string Password { get; set; } = "";
}

View File

@ -0,0 +1,146 @@
using System.Text.Json;
using MailKit;
using MailKit.Net.Imap;
using MailKit.Security;
using MimeKit;
using UglyToad.PdfPig;
namespace ln.tools.mailpdfextract
{
public class MailPdfExtractApplication
{
public string OutputDirectory { get; set; }
public int MaxItemsPerRun { get; set; } = 1000;
private ImapCredentials _credentials;
public MailPdfExtractApplication(string outputDirectory, ImapCredentials credentials)
{
_credentials = credentials;
OutputDirectory = outputDirectory;
}
private HashSet<string> _mailIdSet = new HashSet<string>();
private HashSet<string> _pdfHashSet = new HashSet<string>();
public void Start()
{
Initialize();
ImapClient imapClient = new ImapClient();
imapClient.Connect(_credentials.Hostname, _credentials.Port, SecureSocketOptions.StartTls);
imapClient.Authenticate(_credentials.User, _credentials.Password);
int countItems = 0;
var inbox = imapClient.Inbox;
inbox.Open(FolderAccess.ReadOnly);
for (int n = 0; n < inbox.Count; n++)
{
var message = inbox.GetMessage(n);
if (!_mailIdSet.Contains(message.MessageId))
{
_mailIdSet.Add(message.MessageId);
Console.WriteLine($"Message: {message.Sender:32} | {message.Subject}");
foreach (var attachment in message.Attachments)
{
if (attachment.ContentType.MimeType.Equals("application/pdf"))
{
if (attachment.ContentDisposition.FileName is string pdfFileName)
{
string pdfFullFileName = Path.Combine(OutputDirectory, pdfFileName);
using (FileStream f = File.Create(pdfFullFileName))
{
((MimePart)attachment).Content.DecodeTo(f);
}
File.SetCreationTime(pdfFullFileName, message.Date.Date);
File.SetLastWriteTime(pdfFullFileName, message.Date.Date);
PdfDocument document = PdfDocument.Open(pdfFullFileName);
if (document.Advanced.TryGetEmbeddedFiles(out var embeddedFiles))
{
foreach (var embeddedFile in embeddedFiles)
{
Console.WriteLine(" ***** EMBEDDED ***** {0}",embeddedFile.Name);
if (embeddedFile.Name.Equals("factur-x.xml"))
{
string xfacturFileName = Path.Combine(
OutputDirectory,
Path.GetFileNameWithoutExtension(pdfFileName) + ".factur-x.xml"
);
using (FileStream fs = new FileStream(xfacturFileName, FileMode.Create))
fs.Write(embeddedFile.Bytes.ToArray());
File.SetCreationTime(xfacturFileName, message.Date.Date);
File.SetLastWriteTime(xfacturFileName, message.Date.Date);
}
}
}
countItems++;
}
}
}
}
if (countItems >= MaxItemsPerRun)
break;
}
imapClient.Disconnect(true);
SaveIndeces();
}
public void Initialize()
{
var fn = Path.Combine(OutputDirectory, ".mail.ids");
if (File.Exists(fn))
_mailIdSet = new HashSet<string>(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0)));
fn = Path.Combine(OutputDirectory, ".pdf.ids");
if (File.Exists(fn))
_pdfHashSet = new HashSet<string>(File.ReadAllLines(fn).Where(l => (l.Trim().Length != 0)));
}
public void SaveIndeces()
{
var fn = Path.Combine(OutputDirectory, ".mail.ids");
File.WriteAllLines(fn, _mailIdSet.ToArray());
fn = Path.Combine(OutputDirectory, ".pdf.ids");
File.WriteAllLines(fn, _pdfHashSet.ToArray());
}
public static void Main(string outputDirectory = ".", FileInfo credentialsFile = null, int maxDocuments = 0)
{
if (!Directory.Exists(outputDirectory))
Directory.CreateDirectory(outputDirectory);
if (credentialsFile is null)
{
string fn = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),"mailpdfextract.json");
if (File.Exists(fn))
credentialsFile = new FileInfo(fn);
else
throw new FileNotFoundException("no credential file found! " + fn);
}
ImapCredentials credentials = JsonSerializer.Deserialize<ImapCredentials>(File.ReadAllText(credentialsFile.FullName))!;
MailPdfExtractApplication app = new MailPdfExtractApplication(outputDirectory, credentials);
if (maxDocuments != 0)
app.MaxItemsPerRun = maxDocuments;
app.Start();
}
}
}

View File

@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="MailKit" Version="4.3.0" />
<PackageReference Include="PdfPig" Version="0.1.9-alpha-20240128-f886e" />
<PackageReference Include="System.CommandLine.DragonFruit" Version="0.4.0-alpha.22272.1" />
</ItemGroup>
</Project>