From 796680f62013d11f8aa786629a9e9a994f3ad575 Mon Sep 17 00:00:00 2001 From: Niclas Thobaben Date: Fri, 27 Nov 2020 22:02:28 +0100 Subject: [PATCH] WIP bassic parsing system --- .../java/de/nth/chronicle/gedcom/Gedcom.java | 3 +- .../exception/GedcomException.java | 2 +- .../exception/InvalidEncodingException.java | 2 +- .../exception/InvalidLineException.java | 2 +- .../exception/MissingBomException.java | 2 +- .../exception/MissingRecordException.java | 9 +++ .../exception/UnrecognisedGedcomForm.java | 9 +++ .../gedcom/parser/AddressParser.java | 22 +++++ .../chronicle/gedcom/parser/GedcomReader.java | 32 +++++++- .../gedcom/parser/GedcomReaderContext.java | 46 +++++++++++ .../gedcom/parser/GedcomRecordParser.java | 9 --- .../chronicle/gedcom/parser/HeaderParser.java | 80 +++++++++++++++++++ .../chronicle/gedcom/parser/RecordChunk.java | 32 ++++++++ .../chronicle/gedcom/parser/RecordParser.java | 15 ++++ .../{GedcomTokenizer.java => Tokenizer.java} | 22 +++-- .../nth/chronicle/gedcom/parser/Warning.java | 19 +++++ .../de/nth/chronicle/gedcom/type/Address.java | 21 +++++ .../chronicle/gedcom/type/GedcomFormType.java | 22 +++++ .../de/nth/chronicle/gedcom/type/Header.java | 52 ++++++++++++ .../gedcom/parser/GedcomReaderTests.java | 23 ++++++ .../gedcom/parser/GedcomTokenizerTests.java | 8 +- 21 files changed, 406 insertions(+), 26 deletions(-) rename src/main/java/de/nth/chronicle/gedcom/{parser => }/exception/GedcomException.java (87%) rename src/main/java/de/nth/chronicle/gedcom/{parser => }/exception/InvalidEncodingException.java (86%) rename src/main/java/de/nth/chronicle/gedcom/{parser => }/exception/InvalidLineException.java (88%) rename src/main/java/de/nth/chronicle/gedcom/{parser => }/exception/MissingBomException.java (76%) create mode 100644 src/main/java/de/nth/chronicle/gedcom/exception/MissingRecordException.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/exception/UnrecognisedGedcomForm.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/AddressParser.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/GedcomReaderContext.java delete mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/GedcomRecordParser.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/HeaderParser.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/RecordParser.java rename src/main/java/de/nth/chronicle/gedcom/parser/{GedcomTokenizer.java => Tokenizer.java} (88%) create mode 100644 src/main/java/de/nth/chronicle/gedcom/parser/Warning.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/type/Address.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/type/GedcomFormType.java create mode 100644 src/main/java/de/nth/chronicle/gedcom/type/Header.java create mode 100644 src/test/java/de/nth/chronicle/gedcom/parser/GedcomReaderTests.java diff --git a/src/main/java/de/nth/chronicle/gedcom/Gedcom.java b/src/main/java/de/nth/chronicle/gedcom/Gedcom.java index 4455e96..cb46c3c 100644 --- a/src/main/java/de/nth/chronicle/gedcom/Gedcom.java +++ b/src/main/java/de/nth/chronicle/gedcom/Gedcom.java @@ -1,5 +1,6 @@ package de.nth.chronicle.gedcom; +import de.nth.chronicle.gedcom.type.Header; import lombok.Builder; import lombok.Data; @@ -7,6 +8,6 @@ import lombok.Data; @Data public class Gedcom { - + private final Header header; } diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/exception/GedcomException.java b/src/main/java/de/nth/chronicle/gedcom/exception/GedcomException.java similarity index 87% rename from src/main/java/de/nth/chronicle/gedcom/parser/exception/GedcomException.java rename to src/main/java/de/nth/chronicle/gedcom/exception/GedcomException.java index 02c1dc8..bcdbdde 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/exception/GedcomException.java +++ b/src/main/java/de/nth/chronicle/gedcom/exception/GedcomException.java @@ -1,4 +1,4 @@ -package de.nth.chronicle.gedcom.parser.exception; +package de.nth.chronicle.gedcom.exception; public class GedcomException extends RuntimeException { diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidEncodingException.java b/src/main/java/de/nth/chronicle/gedcom/exception/InvalidEncodingException.java similarity index 86% rename from src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidEncodingException.java rename to src/main/java/de/nth/chronicle/gedcom/exception/InvalidEncodingException.java index 7d7d339..abd04b2 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidEncodingException.java +++ b/src/main/java/de/nth/chronicle/gedcom/exception/InvalidEncodingException.java @@ -1,4 +1,4 @@ -package de.nth.chronicle.gedcom.parser.exception; +package de.nth.chronicle.gedcom.exception; public class InvalidEncodingException extends GedcomException { diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidLineException.java b/src/main/java/de/nth/chronicle/gedcom/exception/InvalidLineException.java similarity index 88% rename from src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidLineException.java rename to src/main/java/de/nth/chronicle/gedcom/exception/InvalidLineException.java index 660a2c2..1e72f55 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/exception/InvalidLineException.java +++ b/src/main/java/de/nth/chronicle/gedcom/exception/InvalidLineException.java @@ -1,4 +1,4 @@ -package de.nth.chronicle.gedcom.parser.exception; +package de.nth.chronicle.gedcom.exception; public class InvalidLineException extends GedcomException { diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/exception/MissingBomException.java b/src/main/java/de/nth/chronicle/gedcom/exception/MissingBomException.java similarity index 76% rename from src/main/java/de/nth/chronicle/gedcom/parser/exception/MissingBomException.java rename to src/main/java/de/nth/chronicle/gedcom/exception/MissingBomException.java index 91a1226..6788639 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/exception/MissingBomException.java +++ b/src/main/java/de/nth/chronicle/gedcom/exception/MissingBomException.java @@ -1,4 +1,4 @@ -package de.nth.chronicle.gedcom.parser.exception; +package de.nth.chronicle.gedcom.exception; public class MissingBomException extends GedcomException { diff --git a/src/main/java/de/nth/chronicle/gedcom/exception/MissingRecordException.java b/src/main/java/de/nth/chronicle/gedcom/exception/MissingRecordException.java new file mode 100644 index 0000000..f75c418 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/exception/MissingRecordException.java @@ -0,0 +1,9 @@ +package de.nth.chronicle.gedcom.exception; + +public class MissingRecordException extends GedcomException { + + public MissingRecordException(String recordPath) { + super(String.format("Missing Record: '%s'!", recordPath)); + } + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/exception/UnrecognisedGedcomForm.java b/src/main/java/de/nth/chronicle/gedcom/exception/UnrecognisedGedcomForm.java new file mode 100644 index 0000000..47c0ff2 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/exception/UnrecognisedGedcomForm.java @@ -0,0 +1,9 @@ +package de.nth.chronicle.gedcom.exception; + +public class UnrecognisedGedcomForm extends GedcomException { + + public UnrecognisedGedcomForm(String form) { + super(String.format("Unrecognised GEDCOM Form '%s'!", form)); + } + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/AddressParser.java b/src/main/java/de/nth/chronicle/gedcom/parser/AddressParser.java new file mode 100644 index 0000000..d35dc9b --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/parser/AddressParser.java @@ -0,0 +1,22 @@ +package de.nth.chronicle.gedcom.parser; + +import de.nth.chronicle.gedcom.exception.GedcomException; +import de.nth.chronicle.gedcom.type.Address; + +public class AddressParser implements RecordParser
{ + + @Override + public Address parse(RecordChunk chunk) throws GedcomException { + + return Address.builder() + .line1(chunk.findFirstValue("ADR1").orElse(null)) + .line2(chunk.findFirstValue("ADR2").orElse(null)) + .line3(chunk.findFirstValue("ADR3").orElse(null)) + .city(chunk.findFirstValue("CITY").orElse(null)) + .state(chunk.findFirstValue("STAE").orElse(null)) + .postalCode(chunk.findFirstValue("POST").orElse(null)) + .country(chunk.findFirstValue("CTRY").orElse(null)) + .build(); + } + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReader.java b/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReader.java index 305b176..d1945fe 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReader.java +++ b/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReader.java @@ -1,12 +1,38 @@ package de.nth.chronicle.gedcom.parser; import de.nth.chronicle.gedcom.Gedcom; -import de.nth.chronicle.gedcom.parser.exception.GedcomException; +import de.nth.chronicle.gedcom.exception.GedcomException; import java.io.InputStream; +import java.util.List; -public interface GedcomReader { +public class GedcomReader { + + private GedcomReaderContext context; + private Gedcom result; + + public GedcomReader() { + + } + + public Gedcom read(InputStream stream) throws GedcomException { + if(this.result != null) return this.result; + + Tokenizer tokenizer = new Tokenizer(stream); + List chunks = tokenizer.parseRecordsChunks(); + + GedcomReaderContext context = new GedcomReaderContext(chunks); + Gedcom.GedcomBuilder builder = context.getGedcomBuilder(); + + for(RecordChunk chunk : chunks) { + + + + } + + this.result = context.getGedcomBuilder().build(); + return this.result; + } - public Gedcom read(InputStream stream) throws GedcomException; } diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReaderContext.java b/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReaderContext.java new file mode 100644 index 0000000..edf0b37 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/parser/GedcomReaderContext.java @@ -0,0 +1,46 @@ +package de.nth.chronicle.gedcom.parser; + +import de.nth.chronicle.gedcom.Gedcom; +import lombok.Getter; + +import java.util.LinkedList; +import java.util.List; + +@Getter +public class GedcomReaderContext { + + private final Gedcom.GedcomBuilder gedcomBuilder; + private final List recordChunks; + + private final List warnings; + + private RecordChunk currentChunk; + private int index = -1; + + public boolean hasNext() { + return (this.index+1) < this.recordChunks.size(); + } + + public RecordChunk nextChunk() { + this.index++; + if(this.index >= this.recordChunks.size()) { + this.currentChunk = null; + return null; + } + this.currentChunk = this.recordChunks.get(this.index); + return this.currentChunk; + } + + public RecordChunk getChunk() { + return this.currentChunk; + } + + GedcomReaderContext(List chunks) { + this.gedcomBuilder = Gedcom.builder(); + this.warnings = new LinkedList<>(); + this.recordChunks = chunks; + } + + + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomRecordParser.java b/src/main/java/de/nth/chronicle/gedcom/parser/GedcomRecordParser.java deleted file mode 100644 index d784a32..0000000 --- a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomRecordParser.java +++ /dev/null @@ -1,9 +0,0 @@ -package de.nth.chronicle.gedcom.parser; - -import de.nth.chronicle.gedcom.Gedcom; - -public interface GedcomRecordParser { - - public void parse(RecordChunk token, Gedcom.GedcomBuilder builder); - -} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/HeaderParser.java b/src/main/java/de/nth/chronicle/gedcom/parser/HeaderParser.java new file mode 100644 index 0000000..54e53a4 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/parser/HeaderParser.java @@ -0,0 +1,80 @@ +package de.nth.chronicle.gedcom.parser; + +import de.nth.chronicle.gedcom.exception.GedcomException; +import de.nth.chronicle.gedcom.exception.MissingRecordException; +import de.nth.chronicle.gedcom.exception.UnrecognisedGedcomForm; +import de.nth.chronicle.gedcom.type.GedcomFormType; +import de.nth.chronicle.gedcom.type.Header; + +public class HeaderParser implements RecordParser
{ + + @Override + public Header parse(RecordChunk chunk) throws GedcomException { + + Header.HeaderBuilder builder = Header.builder(); + + for(RecordChunk subChunk : chunk.getSubRecords()) { + + switch(subChunk.getTag()) { + case "GEDC": + parseGedcomMeta(subChunk, builder); + break; + case "CHAR": + builder.encoding(subChunk.getValue()); + break; + case "SOUR": + parseGedcomSource(subChunk, builder); + break; + case "DATE": + break; + case "FILE": + builder.fileName(subChunk.getValue()); + break; + case "LANG": + builder.language(subChunk.getValue()); + break; + case "COPR": + builder.copyright(subChunk.getValue()); + break; + case "PLAC": + break; + case "NOTE": + builder.contentDescription(subChunk.getValue()); + break; + } + } + + return builder.build(); + } + + private void parseGedcomMeta(RecordChunk chunk, Header.HeaderBuilder builder) { + + GedcomFormType formType = GedcomFormType.forKey(chunk.findFirstValue("FORM") + .orElseThrow(() -> new MissingRecordException("HEAD.GEDC.FORM"))); + + if(formType == null) { + throw new UnrecognisedGedcomForm(chunk.findFirstValue("FORM").orElse(null)); + } + + builder.gedcomMeta(Header.Meta.builder() + .version(chunk.findFirstValue("VERS") + .orElseThrow(() -> new MissingRecordException("HEAD.GEDC.VERS"))) + .formType(formType) + .formVersion(chunk.findFirstValue("FORM.VERS") + .orElse(null)) + .build()); + } + + private void parseGedcomSource(RecordChunk chunk, Header.HeaderBuilder builder) { + + + builder.source(Header.Source.builder() + .systemId(chunk.getValue()) + .version(chunk.findFirstValue("VERS").orElse(null)) + .productName(chunk.findFirstValue("NAME").orElse(null)) + .businessName(chunk.findFirstValue("CORP").orElse(null)) + .address(chunk.findFirst("CORP.ADDR").map(RecordParser.ADDRESS::parse).orElse(null)) + .build()); + } + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/RecordChunk.java b/src/main/java/de/nth/chronicle/gedcom/parser/RecordChunk.java index c501348..4fe4258 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/RecordChunk.java +++ b/src/main/java/de/nth/chronicle/gedcom/parser/RecordChunk.java @@ -13,13 +13,45 @@ public class RecordChunk { private Map recordIndex; + private int lineNumber; + private String sourceLine; + private int level; private String tag; private String value; + private RecordChunk previous, next; + @Builder.Default private List subRecords = new LinkedList<>(); + /** + * Normalizes CONT & CONC Sub Records by appending the line value + * to the parent record line value + */ + void normalize() { + + if(this.value == null) { + this.subRecords.forEach(record -> record.normalize()); + return; + } + + StringBuilder sb = new StringBuilder(this.value); + Set delete = new HashSet<>(); + for(RecordChunk chunk : this.subRecords) { + if(chunk.getTag().equals("CONT")) { + sb.append(System.lineSeparator()).append(chunk.getValue()); + delete.add(chunk); + }else if(chunk.getTag().equals("CONC")) { + sb.append(chunk.getValue()); + delete.add(chunk); + } + chunk.normalize(); + } + this.subRecords.removeAll(delete); + this.value = sb.toString(); + } + public Optional findFirstValue(String tag) { return findFirst(tag) .map(record -> record.getValue()); diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/RecordParser.java b/src/main/java/de/nth/chronicle/gedcom/parser/RecordParser.java new file mode 100644 index 0000000..bb7e111 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/parser/RecordParser.java @@ -0,0 +1,15 @@ +package de.nth.chronicle.gedcom.parser; + +import de.nth.chronicle.gedcom.exception.GedcomException; +import de.nth.chronicle.gedcom.type.Address; +import de.nth.chronicle.gedcom.type.Header; + +public interface RecordParser { + + public T parse(RecordChunk chunk) throws GedcomException; + + + public static RecordParser
HEADER = new HeaderParser(); + public static RecordParser
ADDRESS = new AddressParser(); + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomTokenizer.java b/src/main/java/de/nth/chronicle/gedcom/parser/Tokenizer.java similarity index 88% rename from src/main/java/de/nth/chronicle/gedcom/parser/GedcomTokenizer.java rename to src/main/java/de/nth/chronicle/gedcom/parser/Tokenizer.java index f9b9c7e..82a9669 100644 --- a/src/main/java/de/nth/chronicle/gedcom/parser/GedcomTokenizer.java +++ b/src/main/java/de/nth/chronicle/gedcom/parser/Tokenizer.java @@ -1,8 +1,8 @@ package de.nth.chronicle.gedcom.parser; -import de.nth.chronicle.gedcom.parser.exception.GedcomException; -import de.nth.chronicle.gedcom.parser.exception.InvalidLineException; -import de.nth.chronicle.gedcom.parser.exception.MissingBomException; +import de.nth.chronicle.gedcom.exception.GedcomException; +import de.nth.chronicle.gedcom.exception.InvalidLineException; +import de.nth.chronicle.gedcom.exception.MissingBomException; import de.nth.chronicle.gedcom.util.EncodingUtils; import java.io.BufferedReader; @@ -14,7 +14,7 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -class GedcomTokenizer { +class Tokenizer { // ^\s*(\p{Digit})+\s+([a-zA-Z1-9_]+)(?:\s(.*)$)? public static final Pattern LINE_REGEX = Pattern.compile("^\\s*(\\p{Digit})+\\s+([a-zA-Z1-9_@]+)(?:\\s(.*))?"); @@ -26,7 +26,9 @@ class GedcomTokenizer { private final Stack stack = new Stack<>(); private final BufferedReader reader; - public GedcomTokenizer(InputStream stream) { + private RecordChunk lastChunk; + + public Tokenizer(InputStream stream) { Charset charset = validateEncoding(stream); this.reader = new BufferedReader(new InputStreamReader(stream, charset)); @@ -61,6 +63,8 @@ class GedcomTokenizer { this.currentLineNumber++; } + this.records.forEach(record -> record.normalize()); + return this.records; } @@ -86,6 +90,8 @@ class GedcomTokenizer { .level(level) .tag(tag) .value(value) + .lineNumber(this.currentLineNumber) + .sourceLine(this.currentLine) .build(); return record; @@ -115,6 +121,12 @@ class GedcomTokenizer { this.stack.peek().getSubRecords().add(record); this.stack.push(record); } + + if(this.lastChunk != null) { + this.lastChunk.setNext(record); + } + record.setPrevious(this.lastChunk); + this.lastChunk = record; } /** diff --git a/src/main/java/de/nth/chronicle/gedcom/parser/Warning.java b/src/main/java/de/nth/chronicle/gedcom/parser/Warning.java new file mode 100644 index 0000000..f88395b --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/parser/Warning.java @@ -0,0 +1,19 @@ +package de.nth.chronicle.gedcom.parser; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.Getter; + +@AllArgsConstructor +@Getter +public class Warning { + + private final String message; + private final String line; + private final int lineNumber; + + @Override + public String toString() { + return String.format("Warning: %s '%s' at line %d", this.message, this.line, this.lineNumber); + } +} diff --git a/src/main/java/de/nth/chronicle/gedcom/type/Address.java b/src/main/java/de/nth/chronicle/gedcom/type/Address.java new file mode 100644 index 0000000..9ec7ee4 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/type/Address.java @@ -0,0 +1,21 @@ +package de.nth.chronicle.gedcom.type; + +import lombok.Builder; +import lombok.Data; + +import java.util.List; + +@Data +@Builder +public class Address { + + private String line1; /* ADR1 */ + private String line2; /* ADR2 */ + private String line3; /* ADR3 */ + + private String city; /* CITY */ + private String state; /* STAE */ + private String postalCode; /* POST */ + private String country; /* CTRY */ + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/type/GedcomFormType.java b/src/main/java/de/nth/chronicle/gedcom/type/GedcomFormType.java new file mode 100644 index 0000000..fd44ebf --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/type/GedcomFormType.java @@ -0,0 +1,22 @@ +package de.nth.chronicle.gedcom.type; + +public enum GedcomFormType { + + LINEAGE_LINKED("LINEAGE-LINKED"); + + private String key; + GedcomFormType(String key) { this.key = key; } + + public String getKey() { return this.key; } + + public static GedcomFormType forKey(String key) { + + for(GedcomFormType type : values()) { + if(type.getKey().equals(key)) { + return type; + } + } + return null; + } + +} diff --git a/src/main/java/de/nth/chronicle/gedcom/type/Header.java b/src/main/java/de/nth/chronicle/gedcom/type/Header.java new file mode 100644 index 0000000..cb87a65 --- /dev/null +++ b/src/main/java/de/nth/chronicle/gedcom/type/Header.java @@ -0,0 +1,52 @@ +package de.nth.chronicle.gedcom.type; + +import lombok.Builder; +import lombok.Data; + +import java.time.LocalDate; +import java.time.LocalTime; + +@Data +@Builder +public class Header { + + private String encoding; /* CHAR */ + private String contentDescription; /* NOTE */ + private String language; /* LANG */ + private String copyright; /* COPR */ + private String fileName; /* FILE */ + private LocalDate transmissionDate; /* DATE */ + private LocalTime transmissionTime; /* TIME */ + + private Meta gedcomMeta; /* GEDC */ + private Source source; /* SOUR */ + + @Data + @Builder + public static class Meta { + + private String version; /* VERS */ + private GedcomFormType formType; /* FORM */ + private String formVersion; /* */ + + } + + @Data + @Builder + public static class Source { + + private String systemId; /* SOUR */ + private String version; /* VERS */ + private String productName; /* NAME */ + private String businessName; /* CORP */ + private Address address; /* ADDR */ + private String name; /* DATA */ + private LocalDate publicationDate; /* DATE */ + private LocalTime publicationTime; /* TIME */ + private String copyright; /* COPR */ + + } + + + +} diff --git a/src/test/java/de/nth/chronicle/gedcom/parser/GedcomReaderTests.java b/src/test/java/de/nth/chronicle/gedcom/parser/GedcomReaderTests.java new file mode 100644 index 0000000..b49c62e --- /dev/null +++ b/src/test/java/de/nth/chronicle/gedcom/parser/GedcomReaderTests.java @@ -0,0 +1,23 @@ +package de.nth.chronicle.gedcom.parser; + +import de.nth.chronicle.gedcom.Gedcom; +import org.junit.jupiter.api.Test; + +import java.io.InputStream; + +public class GedcomReaderTests { + + @Test + void testMinimalGedcom() { + + InputStream stream = GedcomReaderTests.class.getResourceAsStream("/examples/555SAMPLE.ged"); + + GedcomReader reader = new GedcomReader(); + + Gedcom gedcom = reader.read(stream); + + System.out.println(gedcom); + + } + +} diff --git a/src/test/java/de/nth/chronicle/gedcom/parser/GedcomTokenizerTests.java b/src/test/java/de/nth/chronicle/gedcom/parser/GedcomTokenizerTests.java index 690ae93..8236395 100644 --- a/src/test/java/de/nth/chronicle/gedcom/parser/GedcomTokenizerTests.java +++ b/src/test/java/de/nth/chronicle/gedcom/parser/GedcomTokenizerTests.java @@ -43,16 +43,14 @@ public class GedcomTokenizerTests { } void validateLine(String line) { - assertTrue(GedcomTokenizer.matchLine(line).matches(), () -> String.format("Invalid Line: '%s'", line)); + assertTrue(Tokenizer.matchLine(line).matches(), () -> String.format("Invalid Line: '%s'", line)); } @Test void testBasicTokenizerFunctionality() throws Exception { InputStream stream = GedcomTokenizerTests.class.getResourceAsStream("/examples/MINIMAL555.ged"); - - GedcomTokenizer tokenizer = new GedcomTokenizer(stream); - + Tokenizer tokenizer = new Tokenizer(stream); List records = null; try { @@ -61,6 +59,8 @@ public class GedcomTokenizerTests { e.printStackTrace(); } + + assertEquals(3, records.size()); }