FEAT new Parsing mechanism and Header impl
parent
796680f620
commit
1a31eba56b
|
@ -4,10 +4,9 @@ import de.nth.chronicle.gedcom.type.Header;
|
|||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Builder(toBuilder = true)
|
||||
@Data
|
||||
public class Gedcom {
|
||||
|
||||
private final Header header;
|
||||
private Header header;
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.exception.GedcomException;
|
||||
|
||||
public abstract class AbstractRecordParser<T> extends ParserMapper implements RecordParser<T> {
|
||||
|
||||
protected abstract T getTargetElement();
|
||||
|
||||
@Override
|
||||
public T parse(RecordChunk chunk) throws GedcomException {
|
||||
parseChunk(chunk); //parse own chunk as well
|
||||
chunk.getSubRecords().forEach(this::parseChunk);
|
||||
return getTargetElement();
|
||||
}
|
||||
}
|
|
@ -1,22 +1,24 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.exception.GedcomException;
|
||||
import de.nth.chronicle.gedcom.type.Address;
|
||||
|
||||
public class AddressParser implements RecordParser<Address> {
|
||||
public class AddressParser extends AbstractRecordParser<Address> {
|
||||
|
||||
@Override
|
||||
public Address parse(RecordChunk chunk) throws GedcomException {
|
||||
private Address address = new Address();
|
||||
|
||||
return Address.builder()
|
||||
.line1(chunk.findFirstValue("ADR1").orElse(null))
|
||||
.line2(chunk.findFirstValue("ADR2").orElse(null))
|
||||
.line3(chunk.findFirstValue("ADR3").orElse(null))
|
||||
.city(chunk.findFirstValue("CITY").orElse(null))
|
||||
.state(chunk.findFirstValue("STAE").orElse(null))
|
||||
.postalCode(chunk.findFirstValue("POST").orElse(null))
|
||||
.country(chunk.findFirstValue("CTRY").orElse(null))
|
||||
.build();
|
||||
public AddressParser() {
|
||||
// TODO ? map(Tag.ADDRESS, RecordParser::stringParser, this.address::setLine1);
|
||||
map(Tag.ADDRESS_LINE_1, RecordParser::stringParser, this.address::setLine1);
|
||||
map(Tag.ADDRESS_LINE_2, RecordParser::stringParser, this.address::setLine2);
|
||||
map(Tag.ADDRESS_LINE_3, RecordParser::stringParser, this.address::setLine3);
|
||||
map(Tag.ADDRESS_CITY, RecordParser::stringParser, this.address::setCity);
|
||||
map(Tag.ADDRESS_STATE, RecordParser::stringParser, this.address::setState);
|
||||
map(Tag.ADDRESS_POSTCODE, RecordParser::stringParser, this.address::setPostalCode);
|
||||
map(Tag.ADDRESS_COUNTRY, RecordParser::stringParser, this.address::setCountry);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Address getTargetElement() {
|
||||
return this.address;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.exception.GedcomException;
|
||||
import de.nth.chronicle.gedcom.type.Address;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
public abstract class AddressableRecordParser<T> extends AbstractRecordParser<T> {
|
||||
|
||||
private List<String> phoneNumbers = new ArrayList<>();
|
||||
private List<String> faxNumbers = new ArrayList<>();
|
||||
private List<String> emails = new ArrayList<>();
|
||||
private List<String> webPages = new ArrayList<>();
|
||||
|
||||
public AddressableRecordParser() {
|
||||
map(Tag.ADDRESS, AddressParser::new, this::consumeAddress);
|
||||
map(Tag.PHONE_NUMBER, () -> RecordParser.stringParser(), this.phoneNumbers::add);
|
||||
map(Tag.FAX_NUMBER, () -> RecordParser.stringParser(), this.faxNumbers::add);
|
||||
map(Tag.EMAIL_ADDRESS, () -> RecordParser.stringParser(), this.emails::add);
|
||||
map(Tag.WEB_PAGE, () -> RecordParser.stringParser(), this.webPages::add);
|
||||
/**
|
||||
* FamilySearch PAF uses GEDCOM 5.5.1, but PAF addresses do not use the WWW tag
|
||||
* specified here, they use the illegal tag URL instead. That is an error in PAF, and
|
||||
* FamilySearch should have fixed PAF. Instead, FamilySearch “fixed” the GEDCOM
|
||||
* specification: in GEDCOM 5.6, the tag has changed from WWW to URL.
|
||||
*/
|
||||
map(Tag.WEB_URL, () -> RecordParser.stringParser(), this.webPages::add);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T parse(RecordChunk chunk) throws GedcomException {
|
||||
T result = super.parse(chunk);
|
||||
consumePhoneNumbers(this.phoneNumbers);
|
||||
consumeFaxNumbers(this.faxNumbers);
|
||||
consumeEmails(this.emails);
|
||||
consumeWebPages(this.webPages);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void consumeAddress(Address address) {}
|
||||
protected void consumePhoneNumbers(List<String> phoneNumbers) {}
|
||||
protected void consumeFaxNumbers(List<String> faxNumbers) {}
|
||||
protected void consumeEmails(List<String> emails) {}
|
||||
protected void consumeWebPages(List<String> webPages) {}
|
||||
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.type.Address;
|
||||
import de.nth.chronicle.gedcom.type.Corporation;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class CorporationParser extends AddressableRecordParser<Corporation>{
|
||||
|
||||
private Corporation corporation = new Corporation();
|
||||
|
||||
public CorporationParser() {
|
||||
super();
|
||||
map(Tag.CORPORATION, RecordParser::stringParser, this.corporation::setBusinessName);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void consumeAddress(Address address) {
|
||||
this.corporation.setAddress(address);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void consumePhoneNumbers(List<String> phoneNumbers) {
|
||||
this.corporation.setPhoneNumbers(phoneNumbers);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void consumeFaxNumbers(List<String> faxNumbers) {
|
||||
this.corporation.setFaxNumbers(faxNumbers);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void consumeEmails(List<String> emails) {
|
||||
this.corporation.setEmails(emails);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void consumeWebPages(List<String> webPages) {
|
||||
this.corporation.setWebPages(webPages);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Corporation getTargetElement() {
|
||||
return this.corporation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.LocalTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
|
||||
//TODO first primitve implementation...
|
||||
public class DateParser extends AbstractRecordParser<LocalDateTime> {
|
||||
|
||||
public static final DateTimeFormatter DATE_FORMATTER = new DateTimeFormatterBuilder()
|
||||
.parseCaseInsensitive()
|
||||
.appendPattern("d MMM uuuu")
|
||||
.toFormatter();
|
||||
|
||||
private LocalDate date;
|
||||
private LocalTime time;
|
||||
|
||||
public DateParser() {
|
||||
map(Tag.DATE, () -> RecordParser.typedParser(DateParser::parseDate), date -> this.date = date);
|
||||
map(Tag.TIME, () -> RecordParser.typedParser(LocalTime::parse), time -> this.time = time);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected LocalDateTime getTargetElement() {
|
||||
LocalTime time = this.time != null ? this.time : LocalTime.of(0, 0);
|
||||
return LocalDateTime.of(this.date, time);
|
||||
}
|
||||
|
||||
private static LocalDate parseDate(String input) {
|
||||
return LocalDate.parse(input, DATE_FORMATTER);
|
||||
}
|
||||
}
|
|
@ -6,33 +6,25 @@ import de.nth.chronicle.gedcom.exception.GedcomException;
|
|||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
public class GedcomReader {
|
||||
public class GedcomReader extends ParserMapper {
|
||||
|
||||
private GedcomReaderContext context;
|
||||
private Gedcom result;
|
||||
private Gedcom gedcom = new Gedcom();
|
||||
|
||||
public GedcomReader() {
|
||||
|
||||
map(Tag.HEAD, HeaderParser::new, this.gedcom::setHeader);
|
||||
}
|
||||
|
||||
public Gedcom read(InputStream stream) throws GedcomException {
|
||||
if(this.result != null) return this.result;
|
||||
|
||||
Tokenizer tokenizer = new Tokenizer(stream);
|
||||
List<RecordChunk> chunks = tokenizer.parseRecordsChunks();
|
||||
|
||||
GedcomReaderContext context = new GedcomReaderContext(chunks);
|
||||
Gedcom.GedcomBuilder builder = context.getGedcomBuilder();
|
||||
|
||||
for(RecordChunk chunk : chunks) {
|
||||
|
||||
|
||||
|
||||
parseChunk(chunk);
|
||||
}
|
||||
|
||||
this.result = context.getGedcomBuilder().build();
|
||||
return this.result;
|
||||
return this.gedcom;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class GedcomReaderContext {
|
||||
|
||||
private final Gedcom.GedcomBuilder gedcomBuilder;
|
||||
private final List<RecordChunk> recordChunks;
|
||||
|
||||
private final List<Warning> warnings;
|
||||
|
||||
private RecordChunk currentChunk;
|
||||
private int index = -1;
|
||||
|
||||
public boolean hasNext() {
|
||||
return (this.index+1) < this.recordChunks.size();
|
||||
}
|
||||
|
||||
public RecordChunk nextChunk() {
|
||||
this.index++;
|
||||
if(this.index >= this.recordChunks.size()) {
|
||||
this.currentChunk = null;
|
||||
return null;
|
||||
}
|
||||
this.currentChunk = this.recordChunks.get(this.index);
|
||||
return this.currentChunk;
|
||||
}
|
||||
|
||||
public RecordChunk getChunk() {
|
||||
return this.currentChunk;
|
||||
}
|
||||
|
||||
GedcomReaderContext(List<RecordChunk> chunks) {
|
||||
this.gedcomBuilder = Gedcom.builder();
|
||||
this.warnings = new LinkedList<>();
|
||||
this.recordChunks = chunks;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -1,80 +1,27 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.exception.GedcomException;
|
||||
import de.nth.chronicle.gedcom.exception.MissingRecordException;
|
||||
import de.nth.chronicle.gedcom.exception.UnrecognisedGedcomForm;
|
||||
import de.nth.chronicle.gedcom.type.GedcomFormType;
|
||||
import de.nth.chronicle.gedcom.type.Header;
|
||||
|
||||
public class HeaderParser implements RecordParser<Header> {
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
public class HeaderParser extends AbstractRecordParser<Header> {
|
||||
|
||||
private Header header = new Header();
|
||||
|
||||
public HeaderParser() {
|
||||
map(Tag.FILENAME, RecordParser::stringParser, this.header::setFileName);
|
||||
map(Tag.ENCODING, () -> RecordParser.typedParser(Charset::forName), this.header::setEncoding);
|
||||
map(Tag.LANGUAGE, RecordParser::stringParser, this.header::setLanguage);
|
||||
map(Tag.DESTINATION, RecordParser::stringParser, this.header::setDestination);
|
||||
map(Tag.NOTE, RecordParser::stringParser, this.header::setContentDescription);
|
||||
map(Tag.SOURCE, SourceSystemParser::new, this.header::setSourceSystem);
|
||||
map(Tag.DATE, DateParser::new, this.header::setTransmissionDateTime);
|
||||
map(Tag.GEDCOM_VERSION, HeaderVersionParser::new, this.header::setGedcomVersion);
|
||||
map(Tag.PLACE, PlaceParser::new, this.header::setPlace);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Header parse(RecordChunk chunk) throws GedcomException {
|
||||
|
||||
Header.HeaderBuilder builder = Header.builder();
|
||||
|
||||
for(RecordChunk subChunk : chunk.getSubRecords()) {
|
||||
|
||||
switch(subChunk.getTag()) {
|
||||
case "GEDC":
|
||||
parseGedcomMeta(subChunk, builder);
|
||||
break;
|
||||
case "CHAR":
|
||||
builder.encoding(subChunk.getValue());
|
||||
break;
|
||||
case "SOUR":
|
||||
parseGedcomSource(subChunk, builder);
|
||||
break;
|
||||
case "DATE":
|
||||
break;
|
||||
case "FILE":
|
||||
builder.fileName(subChunk.getValue());
|
||||
break;
|
||||
case "LANG":
|
||||
builder.language(subChunk.getValue());
|
||||
break;
|
||||
case "COPR":
|
||||
builder.copyright(subChunk.getValue());
|
||||
break;
|
||||
case "PLAC":
|
||||
break;
|
||||
case "NOTE":
|
||||
builder.contentDescription(subChunk.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
protected Header getTargetElement() {
|
||||
return this.header;
|
||||
}
|
||||
|
||||
private void parseGedcomMeta(RecordChunk chunk, Header.HeaderBuilder builder) {
|
||||
|
||||
GedcomFormType formType = GedcomFormType.forKey(chunk.findFirstValue("FORM")
|
||||
.orElseThrow(() -> new MissingRecordException("HEAD.GEDC.FORM")));
|
||||
|
||||
if(formType == null) {
|
||||
throw new UnrecognisedGedcomForm(chunk.findFirstValue("FORM").orElse(null));
|
||||
}
|
||||
|
||||
builder.gedcomMeta(Header.Meta.builder()
|
||||
.version(chunk.findFirstValue("VERS")
|
||||
.orElseThrow(() -> new MissingRecordException("HEAD.GEDC.VERS")))
|
||||
.formType(formType)
|
||||
.formVersion(chunk.findFirstValue("FORM.VERS")
|
||||
.orElse(null))
|
||||
.build());
|
||||
}
|
||||
|
||||
private void parseGedcomSource(RecordChunk chunk, Header.HeaderBuilder builder) {
|
||||
|
||||
|
||||
builder.source(Header.Source.builder()
|
||||
.systemId(chunk.getValue())
|
||||
.version(chunk.findFirstValue("VERS").orElse(null))
|
||||
.productName(chunk.findFirstValue("NAME").orElse(null))
|
||||
.businessName(chunk.findFirstValue("CORP").orElse(null))
|
||||
.address(chunk.findFirst("CORP.ADDR").map(RecordParser.ADDRESS::parse).orElse(null))
|
||||
.build());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.type.FormType;
|
||||
import de.nth.chronicle.gedcom.type.HeaderVersion;
|
||||
|
||||
public class HeaderVersionParser extends AbstractRecordParser<HeaderVersion>{
|
||||
|
||||
private HeaderVersion version = new HeaderVersion();
|
||||
|
||||
public HeaderVersionParser() {
|
||||
map(Tag.VERSION, RecordParser::stringParser, this.version::setVersion);
|
||||
map(Tag.FORM, () -> RecordParser.typedParser(FormType::forKey), this.version::setFormType);
|
||||
//TODO map Form Version under FORM.VERS
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HeaderVersion getTargetElement() {
|
||||
return this.version;
|
||||
}
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
public abstract class InvalidGedcomException extends RuntimeException {
|
||||
|
||||
public InvalidGedcomException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public static class MissingBOM extends InvalidGedcomException {
|
||||
public MissingBOM() {
|
||||
super("GEDCOM 5.5.5 file lacks Byte Order Mark!");
|
||||
}
|
||||
}
|
||||
|
||||
public static class InvalidLine extends InvalidGedcomException {
|
||||
public InvalidLine(int line, String content) {
|
||||
super(String.format("Invalid Gedcom: '%s' at line %d!", content, line));
|
||||
}
|
||||
}
|
||||
|
||||
public static class InvalidOrMissingRecord extends InvalidGedcomException {
|
||||
public InvalidOrMissingRecord(String tag) {
|
||||
super(String.format("Record '%s' is missing or invalid!", tag));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -6,14 +6,20 @@ import lombok.Getter;
|
|||
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
public class Warning {
|
||||
public class ParseError {
|
||||
|
||||
public enum Type {
|
||||
WARNING,
|
||||
ERROR
|
||||
}
|
||||
|
||||
private final Type type;
|
||||
private final String message;
|
||||
private final String line;
|
||||
private final int lineNumber;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Warning: %s '%s' at line %d", this.message, this.line, this.lineNumber);
|
||||
return String.format("%s: %s '%s' at line %d", this.type, this.message, this.line, this.lineNumber);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
public abstract class ParserMapper {
|
||||
|
||||
@AllArgsConstructor
|
||||
private static class Mapping<T> {
|
||||
|
||||
Supplier<RecordParser<T>> parserSupplier;
|
||||
Consumer<T> consumer;
|
||||
|
||||
void parse(RecordChunk chunk) {
|
||||
T result = this.parserSupplier.get().parse(chunk);
|
||||
this.consumer.accept(result);
|
||||
}
|
||||
}
|
||||
|
||||
private final Map<Tag, Mapping<?>> mappings = new HashMap<>();
|
||||
|
||||
protected void parseChunk(RecordChunk chunk) {
|
||||
// 1. Find Parser for tag
|
||||
// 2. Parse chunk
|
||||
// 3. Consume Result
|
||||
|
||||
if(this.mappings.containsKey(chunk.getTag())) {
|
||||
Mapping<?> mapping = this.mappings.get(chunk.getTag());
|
||||
mapping.parse(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
protected <T> void map(Tag tag, Supplier<RecordParser<T>> parserSupplier, Consumer<T> consumer) {
|
||||
this.mappings.put(tag, new Mapping<>(parserSupplier, consumer));
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.type.Place;
|
||||
|
||||
public class PlaceParser extends AbstractRecordParser<Place> {
|
||||
|
||||
private Place place = new Place();
|
||||
|
||||
public PlaceParser() {
|
||||
map(Tag.PLACE, RecordParser::stringParser, this.place::setName);
|
||||
map(Tag.PLACE_PHONETIC, () -> this::parseTranscription, this.place.getPhonetic()::add);
|
||||
map(Tag.PLACE_ROMANISED, () -> this::parseTranscription, this.place.getRomanised()::add);
|
||||
map(Tag.PLACE_MAP, () -> this::parseLocation, this.place::setLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Place getTargetElement() {
|
||||
return this.place;
|
||||
}
|
||||
|
||||
private Place.Transcription parseTranscription(RecordChunk chunk) {
|
||||
Place.Transcription transcript = new Place.Transcription();
|
||||
transcript.setText(chunk.getValue());
|
||||
chunk.findChunk(ch -> ch.getTag() == Tag.TRANSCRIPT_METHOD)
|
||||
.map(typeChunk -> typeChunk.getValue())
|
||||
.ifPresent(transcript::setMethod);
|
||||
return transcript;
|
||||
}
|
||||
|
||||
private Place.Location parseLocation(RecordChunk chunk) {
|
||||
Place.Location loc = new Place.Location();
|
||||
|
||||
chunk.findChunk(ch -> ch.getTag() == Tag.PLACE_LATITUDE)
|
||||
.map(lat -> lat.getValue())
|
||||
.ifPresent(loc::setLatitude);
|
||||
|
||||
chunk.findChunk(ch -> ch.getTag() == Tag.PLACE_LONGITUDE)
|
||||
.map(lat -> lat.getValue())
|
||||
.ifPresent(loc::setLongitude);
|
||||
|
||||
return loc;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class ReaderContext {
|
||||
|
||||
private Gedcom gedcom = new Gedcom();
|
||||
private final List<ParseError> errors = new LinkedList<>();
|
||||
private final List<RecordChunk> chunks;
|
||||
|
||||
ReaderContext(List<RecordChunk> chunks) {
|
||||
this.chunks = chunks;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -5,19 +5,18 @@ import lombok.Getter;
|
|||
import lombok.Setter;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
public class RecordChunk {
|
||||
|
||||
private Map<String, RecordChunk> recordIndex;
|
||||
|
||||
private int lineNumber;
|
||||
private String sourceLine;
|
||||
|
||||
private int level;
|
||||
private String tag;
|
||||
private Tag tag;
|
||||
private String value;
|
||||
|
||||
private RecordChunk previous, next;
|
||||
|
@ -39,10 +38,10 @@ public class RecordChunk {
|
|||
StringBuilder sb = new StringBuilder(this.value);
|
||||
Set<RecordChunk> delete = new HashSet<>();
|
||||
for(RecordChunk chunk : this.subRecords) {
|
||||
if(chunk.getTag().equals("CONT")) {
|
||||
if(chunk.getTag() == Tag.CONTINUE) {
|
||||
sb.append(System.lineSeparator()).append(chunk.getValue());
|
||||
delete.add(chunk);
|
||||
}else if(chunk.getTag().equals("CONC")) {
|
||||
}else if(chunk.getTag() == Tag.CONCAT) {
|
||||
sb.append(chunk.getValue());
|
||||
delete.add(chunk);
|
||||
}
|
||||
|
@ -52,47 +51,12 @@ public class RecordChunk {
|
|||
this.value = sb.toString();
|
||||
}
|
||||
|
||||
public Optional<String> findFirstValue(String tag) {
|
||||
return findFirst(tag)
|
||||
.map(record -> record.getValue());
|
||||
public Optional<RecordChunk> findChunk(Predicate<RecordChunk> predicate) {
|
||||
return this.getSubRecords().stream()
|
||||
.filter(predicate)
|
||||
.findAny();
|
||||
}
|
||||
|
||||
public Optional<RecordChunk> findFirst(String tag) {
|
||||
return findIndexed(tag);
|
||||
}
|
||||
|
||||
private Optional<RecordChunk> findIndexed(String tag) {
|
||||
if(this.recordIndex == null) {
|
||||
this.recordIndex = new HashMap<>();
|
||||
}
|
||||
if(this.recordIndex.containsKey(tag)) {
|
||||
return Optional.of(this.recordIndex.get(tag));
|
||||
}
|
||||
return searchRecord(tag).map(record -> putIndex(tag, record));
|
||||
}
|
||||
|
||||
private Optional<RecordChunk> searchRecord(String tag) {
|
||||
String[] pathTokens = tag.split("\\.");
|
||||
|
||||
RecordChunk lastRecord = this;
|
||||
|
||||
for(String token : pathTokens) {
|
||||
for(RecordChunk record : lastRecord.subRecords) {
|
||||
if(record.getTag().equals(token)) {
|
||||
lastRecord = record;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(!lastRecord.getTag().equals(pathTokens[pathTokens.length - 1])) {
|
||||
lastRecord = null;
|
||||
}
|
||||
return Optional.ofNullable(lastRecord);
|
||||
}
|
||||
|
||||
private RecordChunk putIndex(String tag, RecordChunk record) {
|
||||
this.recordIndex.put(tag, record);
|
||||
return record;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -4,12 +4,17 @@ import de.nth.chronicle.gedcom.exception.GedcomException;
|
|||
import de.nth.chronicle.gedcom.type.Address;
|
||||
import de.nth.chronicle.gedcom.type.Header;
|
||||
|
||||
import java.util.function.Function;
|
||||
|
||||
public interface RecordParser<T> {
|
||||
|
||||
public T parse(RecordChunk chunk) throws GedcomException;
|
||||
|
||||
public static RecordParser<String> StringParser = chunk -> chunk.getValue();
|
||||
|
||||
public static RecordParser<Header> HEADER = new HeaderParser();
|
||||
public static RecordParser<Address> ADDRESS = new AddressParser();
|
||||
public static RecordParser<String> stringParser() { return StringParser; }
|
||||
public static <T> RecordParser<T> typedParser(Function<String, T> fn) {
|
||||
return chunk -> fn.apply(chunk.getValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.type.SourceSystemData;
|
||||
|
||||
public class SourceSystemDataParser extends AbstractRecordParser<SourceSystemData> {
|
||||
|
||||
private SourceSystemData data = new SourceSystemData();
|
||||
|
||||
public SourceSystemDataParser() {
|
||||
map(Tag.SOURCE_DATA, RecordParser::stringParser, this.data::setName);
|
||||
map(Tag.COPYRIGHT, RecordParser::stringParser, this.data::setCopyright);
|
||||
map(Tag.DATE, DateParser::new, this.data::setPublicationDateTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SourceSystemData getTargetElement() {
|
||||
return this.data;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.type.SourceSystem;
|
||||
|
||||
public class SourceSystemParser extends AbstractRecordParser<SourceSystem>{
|
||||
|
||||
private SourceSystem sourceSystem = new SourceSystem();
|
||||
|
||||
public SourceSystemParser() {
|
||||
map(Tag.SOURCE, RecordParser::stringParser, this.sourceSystem::setSystemId);
|
||||
map(Tag.VERSION, RecordParser::stringParser, this.sourceSystem::setVersion);
|
||||
map(Tag.NAME, RecordParser::stringParser, this.sourceSystem::setProductName);
|
||||
map(Tag.CORPORATION, CorporationParser::new, this.sourceSystem::setCorporation);
|
||||
map(Tag.SOURCE_DATA, SourceSystemDataParser::new, this.sourceSystem::setData);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SourceSystem getTargetElement() {
|
||||
return this.sourceSystem;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
|
||||
public enum Tag {
|
||||
|
||||
ADDRESS("ADDR"),
|
||||
ADDRESS_CITY("CITY"),
|
||||
ADDRESS_COUNTRY("CTRY"),
|
||||
ADDRESS_LINE_1("ADR1"),
|
||||
ADDRESS_LINE_2("ADR2"),
|
||||
ADDRESS_LINE_3("ADR3"),
|
||||
ADDRESS_POSTCODE("POST"),
|
||||
ADDRESS_STATE("STAE"),
|
||||
CONCAT("CONC"),
|
||||
CONTINUE("CONT"),
|
||||
COPYRIGHT("COPR"),
|
||||
CORPORATION("CORP"),
|
||||
DATE("DATE"),
|
||||
DESTINATION("DEST"),
|
||||
EMAIL_ADDRESS("EMAIL"),
|
||||
ENCODING("CHAR"),
|
||||
FAX_NUMBER("FAX"),
|
||||
FILENAME("FILE"),
|
||||
FORM("FORM"),
|
||||
GEDCOM_VERSION("GEDC"),
|
||||
HEAD("HEAD"),
|
||||
LANGUAGE("LANG"),
|
||||
NAME("NAME"),
|
||||
NOTE("NOTE"),
|
||||
PHONE_NUMBER("PHONE"),
|
||||
PLACE("PLAC"),
|
||||
PLACE_LATITUDE("LATI"),
|
||||
PLACE_LONGITUDE("LONG"),
|
||||
PLACE_MAP("MAP"),
|
||||
PLACE_PHONETIC("FONE"),
|
||||
PLACE_ROMANISED("ROMN"),
|
||||
|
||||
SOURCE("SOUR"),
|
||||
SOURCE_DATA("DATA"),
|
||||
TIME("TIME"),
|
||||
TRANSCRIPT_METHOD("TYPE"),
|
||||
UNKNOWN(""),
|
||||
VERSION("VERS"),
|
||||
WEB_PAGE("WWW"),
|
||||
|
||||
WEB_URL("URL");
|
||||
|
||||
private String text;
|
||||
|
||||
private Tag(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
public boolean compareText(String input) {
|
||||
return this.text.equals(input);
|
||||
}
|
||||
|
||||
public static Tag forText(String text) {
|
||||
for (Tag tag : values()) {
|
||||
if (tag.compareText(text)) {
|
||||
return tag;
|
||||
}
|
||||
}
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
|
@ -79,7 +79,8 @@ class Tokenizer {
|
|||
private RecordChunk parseRecordChunk() {
|
||||
Matcher matcher = matchLine(this.currentLine);
|
||||
if(!matcher.matches()) {
|
||||
throw new InvalidGedcomException.InvalidLine(this.currentLineNumber, this.currentLine);
|
||||
// TODO throw new InvalidGedcomException.InvalidLine(this.currentLineNumber, this.currentLine);
|
||||
return null;
|
||||
}
|
||||
|
||||
int level = Integer.parseInt(matcher.group(1));
|
||||
|
@ -88,7 +89,7 @@ class Tokenizer {
|
|||
|
||||
RecordChunk record = RecordChunk.builder()
|
||||
.level(level)
|
||||
.tag(tag)
|
||||
.tag(Tag.forText(tag))
|
||||
.value(value)
|
||||
.lineNumber(this.currentLineNumber)
|
||||
.sourceLine(this.currentLine)
|
||||
|
@ -98,6 +99,9 @@ class Tokenizer {
|
|||
}
|
||||
|
||||
private void pushRecordChunk(RecordChunk record) {
|
||||
|
||||
if(record == null) return;
|
||||
|
||||
if(this.stack.isEmpty()) {
|
||||
this.stack.push(record);
|
||||
this.records.add(record);
|
||||
|
|
|
@ -6,7 +6,6 @@ import lombok.Data;
|
|||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class Address {
|
||||
|
||||
private String line1; /* ADR1 <ADDRESS_LINE1> */
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class Corporation {
|
||||
|
||||
private String businessName;
|
||||
private Address address;
|
||||
private List<String> phoneNumbers;
|
||||
private List<String> faxNumbers;
|
||||
private List<String> emails;
|
||||
private List<String> webPages;
|
||||
|
||||
}
|
|
@ -1,17 +1,17 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
public enum GedcomFormType {
|
||||
public enum FormType {
|
||||
|
||||
LINEAGE_LINKED("LINEAGE-LINKED");
|
||||
|
||||
private String key;
|
||||
GedcomFormType(String key) { this.key = key; }
|
||||
FormType(String key) { this.key = key; }
|
||||
|
||||
public String getKey() { return this.key; }
|
||||
|
||||
public static GedcomFormType forKey(String key) {
|
||||
public static FormType forKey(String key) {
|
||||
|
||||
for(GedcomFormType type : values()) {
|
||||
for(FormType type : values()) {
|
||||
if(type.getKey().equals(key)) {
|
||||
return type;
|
||||
}
|
|
@ -1,51 +1,31 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.nio.charset.Charset;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class Header {
|
||||
|
||||
private String encoding; /* CHAR <CHARACTER_SET> */
|
||||
private Charset encoding; /* CHAR <CHARACTER_SET> */
|
||||
private String contentDescription; /* NOTE <GEDCOM_CONTENT_DESCRIPTION> */
|
||||
private String language; /* LANG <LANGUAGE_OF_TEXT> */
|
||||
private String copyright; /* COPR <COPYRIGHT_GEDCOM_FILE> */
|
||||
private String fileName; /* FILE <FILE_NAME> */
|
||||
private LocalDate transmissionDate; /* DATE <TRANSMISSION_DATE> */
|
||||
private LocalTime transmissionTime; /* TIME <TIME_VALUE> */
|
||||
private LocalDateTime transmissionDateTime;
|
||||
private String destination; /* DEST <RECEIVING_SYSTEM_NAME> */
|
||||
|
||||
private Meta gedcomMeta; /* GEDC */
|
||||
private Source source; /* SOUR */
|
||||
private HeaderVersion gedcomVersion; /* GEDC */
|
||||
private SourceSystem sourceSystem; /* SOUR */
|
||||
private Place place; /* PLAC */
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public static class Meta {
|
||||
|
||||
private String version; /* VERS <VERSION_NUMBER> */
|
||||
private GedcomFormType formType; /* FORM <GEDCOM_FORM> */
|
||||
private String formVersion; /* */
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public static class Source {
|
||||
|
||||
private String systemId; /* SOUR <APPROVED_SYSTEM_ID> */
|
||||
private String version; /* VERS <VERSION_NUMBER> */
|
||||
private String productName; /* NAME <NAME_OF_PRODUCT> */
|
||||
private String businessName; /* CORP <NAME_OF_BUSINESS> */
|
||||
private Address address; /* ADDR <ADDRESS_STRUCTURE> */
|
||||
private String name; /* DATA <NAME_OF_SOURCE_DATA> */
|
||||
private LocalDate publicationDate; /* DATE <PUBLICATION_DATE> */
|
||||
private LocalTime publicationTime; /* TIME <PUBLICATION_TIME> */
|
||||
private String copyright; /* COPR <COPYRIGHT_SOURCE_DATA> */
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class HeaderVersion {
|
||||
|
||||
private String version; /* VERS <VERSION_NUMBER> */
|
||||
private FormType formType; /* FORM <GEDCOM_FORM> */
|
||||
private String formVersion; /* */
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class Place {
|
||||
|
||||
@Data
|
||||
public static class Transcription {
|
||||
|
||||
private String text;
|
||||
private String method;
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class Location {
|
||||
|
||||
private String latitude;
|
||||
private String longitude;
|
||||
|
||||
}
|
||||
|
||||
private String name;
|
||||
|
||||
private List<Transcription> phonetic = new ArrayList<>();
|
||||
private List<Transcription> romanised = new ArrayList<>();
|
||||
|
||||
private Location location;
|
||||
|
||||
private String notes;
|
||||
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
|
||||
@Data
|
||||
public class SourceSystem {
|
||||
|
||||
private String systemId; /* SOUR <APPROVED_SYSTEM_ID> */
|
||||
private String version; /* VERS <VERSION_NUMBER> */
|
||||
private String productName; /* NAME <NAME_OF_PRODUCT> */
|
||||
|
||||
private Corporation corporation; /* CORP <NAME_OF_BUSINESS> */
|
||||
private SourceSystemData data;
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
package de.nth.chronicle.gedcom.type;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.LocalTime;
|
||||
|
||||
@Data
|
||||
public class SourceSystemData {
|
||||
|
||||
private String name; /* DATA <NAME_OF_SOURCE_DATA> */
|
||||
private LocalDateTime publicationDateTime; /* DATE <PUBLICATION_DATE> */
|
||||
private String copyright; /* COPR <COPYRIGHT_SOURCE_DATA> */
|
||||
|
||||
}
|
|
@ -16,7 +16,20 @@ public class GedcomReaderTests {
|
|||
|
||||
Gedcom gedcom = reader.read(stream);
|
||||
|
||||
System.out.println(gedcom);
|
||||
System.out.println(gedcom.getHeader().getSourceSystem().getCorporation());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTortureGedcom() {
|
||||
|
||||
InputStream stream = GedcomReaderTests.class.getResourceAsStream("/examples/GEDCOM_UTF-8.ged");
|
||||
|
||||
GedcomReader reader = new GedcomReader();
|
||||
|
||||
Gedcom gedcom = reader.read(stream);
|
||||
|
||||
System.out.println(gedcom.getHeader());
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue