gedcom-api/src/test/java/de/nth/chronicle/gedcom/parser/GedcomTokenizerTests.java

69 lines
2.5 KiB
Java

package de.nth.chronicle.gedcom.parser;
import org.junit.jupiter.api.Test;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.*;
public class GedcomTokenizerTests {
void useResourceReader(String resource, Charset charset, Consumer<BufferedReader> consumer) throws Exception{
InputStream stream = GedcomTokenizerTests.class.getResourceAsStream(resource);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charset));
consumer.accept(reader);
reader.close();
}
void useResourceReader(String resource, Consumer<BufferedReader> consumer) throws Exception{
useResourceReader(resource, StandardCharsets.UTF_8, consumer);
}
@Test
void testBasicTokenizerLineRegex() throws Exception {
useResourceReader("/examples/MINIMAL555.ged", reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/555SAMPLE.ged", reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/555SAMPLE16BE.ged",
StandardCharsets.UTF_16BE,
reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/555SAMPLE16LE.ged",
StandardCharsets.UTF_16LE,
reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/REMARR.ged", reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/SSMARR.ged", reader -> reader.lines().forEach(this::validateLine));
useResourceReader("/examples/555SAMPLE_formatted.ged", reader -> reader.lines().forEach(this::validateLine));
}
void validateLine(String line) {
assertTrue(Tokenizer.matchLine(line).matches(), () -> String.format("Invalid Line: '%s'", line));
}
@Test
void testBasicTokenizerFunctionality() throws Exception {
InputStream stream = GedcomTokenizerTests.class.getResourceAsStream("/examples/MINIMAL555.ged");
Tokenizer tokenizer = new Tokenizer(stream);
List<RecordChunk> records = null;
try {
records = tokenizer.parseRecordsChunks();
}catch(Exception e) {
e.printStackTrace();
}
assertEquals(3, records.size());
}
}