REFC renamed RecordToken -> RecordChunk
parent
c8e50a410b
commit
e79d6f6236
|
@ -7,6 +7,6 @@ import lombok.Data;
|
|||
@Data
|
||||
public class Gedcom {
|
||||
|
||||
private final GedcomHeader header;
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
package de.nth.chronicle.gedcom;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
|
||||
@Data
|
||||
@Builder(toBuilder = true)
|
||||
public class GedcomHeader {
|
||||
|
||||
private final String versionNumber;
|
||||
@NonNull
|
||||
private final String gedcomForm;
|
||||
@NonNull
|
||||
private final String gedcomFormVersion;
|
||||
@NonNull
|
||||
private final String characterSet;
|
||||
|
||||
//SOUR
|
||||
private final String approvedSystemId;
|
||||
private final String sourceVersion;
|
||||
private final String nameOfProduct;
|
||||
private final String nameOfBusiness;
|
||||
private final String nameOfSourceData;
|
||||
private final LocalDate publicationDate;
|
||||
private final String copyrightSourceData;
|
||||
|
||||
private final String receivingSystemName;
|
||||
private final LocalDate transmissionDate;
|
||||
private final LocalTime time;
|
||||
private final String fileName;
|
||||
private final String copyrightGedcomFile;
|
||||
private final String language;
|
||||
private final String contentDescription;
|
||||
|
||||
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
package de.nth.chronicle.gedcom;
|
||||
|
||||
public enum GedcomVersion {
|
||||
|
||||
VERSION_5_5_5("5.5.5");
|
||||
|
||||
private String version;
|
||||
|
||||
GedcomVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return this.version;
|
||||
}
|
||||
|
||||
public static GedcomVersion forVersionString(String version) {
|
||||
for(GedcomVersion ver : values()) {
|
||||
if(ver.getVersion().equals(version)) {
|
||||
return ver;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException(String.format("No Gedcom Version '%s' found!", version));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import de.nth.chronicle.gedcom.parser.records.HeaderRecordParser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class Gedcom555Parser implements GedcomParser {
|
||||
|
||||
private static final Map<String, GedcomRecordParser> RECORD_PARSER_MAP = new HashMap<>();
|
||||
static {
|
||||
RECORD_PARSER_MAP.put("HEAD", new HeaderRecordParser());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Gedcom parseGedcom(InputStream stream) throws Exception {
|
||||
|
||||
GedcomTokenizer tokenizer = new GedcomTokenizer(stream);
|
||||
|
||||
List<RecordToken> tokens = tokenizer.parseRecordsTokens();
|
||||
Gedcom.GedcomBuilder builder = Gedcom.builder();
|
||||
|
||||
for(RecordToken token : tokens) {
|
||||
if(RECORD_PARSER_MAP.containsKey(token.getTag())) {
|
||||
RECORD_PARSER_MAP.get(token.getTag()).parse(token, builder);
|
||||
}else {
|
||||
System.err.println("No Parser found for tag " + token.getTag());
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import de.nth.chronicle.gedcom.GedcomVersion;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
public interface GedcomParser {
|
||||
|
||||
public Gedcom parseGedcom(InputStream stream) throws Exception;
|
||||
|
||||
|
||||
public static GedcomParser getDefault() {
|
||||
return getParser(GedcomVersion.VERSION_5_5_5);
|
||||
}
|
||||
public static GedcomParser getParser(GedcomVersion version) {
|
||||
return new Gedcom555Parser();
|
||||
}
|
||||
|
||||
}
|
|
@ -4,6 +4,6 @@ import de.nth.chronicle.gedcom.Gedcom;
|
|||
|
||||
public interface GedcomRecordParser {
|
||||
|
||||
public void parse(RecordToken token, Gedcom.GedcomBuilder builder);
|
||||
public void parse(RecordChunk token, Gedcom.GedcomBuilder builder);
|
||||
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ class GedcomTokenizer {
|
|||
private String currentLine;
|
||||
private int currentLineNumber;
|
||||
|
||||
private final List<RecordToken> records = new LinkedList<>();
|
||||
private final Stack<RecordToken> stack = new Stack<>();
|
||||
private final List<RecordChunk> records = new LinkedList<>();
|
||||
private final Stack<RecordChunk> stack = new Stack<>();
|
||||
private final BufferedReader reader;
|
||||
|
||||
public GedcomTokenizer(InputStream stream) {
|
||||
|
@ -45,7 +45,7 @@ class GedcomTokenizer {
|
|||
* ▪ do not trim trailing white space from any GEDCOM line or line value
|
||||
* ▪ do not trim leading white space from any line value
|
||||
* */
|
||||
public List<RecordToken> parseRecordsTokens() throws GedcomException {
|
||||
public List<RecordChunk> parseRecordsChunks() throws GedcomException {
|
||||
|
||||
if(!this.records.isEmpty()) {
|
||||
return this.records;
|
||||
|
@ -57,7 +57,7 @@ class GedcomTokenizer {
|
|||
throw new InvalidLineException(this.currentLineNumber, this.currentLine, "Line is too long!");
|
||||
}
|
||||
|
||||
pushRecordToken(parseRecordToken());
|
||||
pushRecordChunk(parseRecordChunk());
|
||||
this.currentLineNumber++;
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,7 @@ class GedcomTokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
private RecordToken parseRecordToken() {
|
||||
private RecordChunk parseRecordChunk() {
|
||||
Matcher matcher = matchLine(this.currentLine);
|
||||
if(!matcher.matches()) {
|
||||
throw new InvalidGedcomException.InvalidLine(this.currentLineNumber, this.currentLine);
|
||||
|
@ -82,7 +82,7 @@ class GedcomTokenizer {
|
|||
String tag = matcher.group(2);
|
||||
String value = matcher.group(3);
|
||||
|
||||
RecordToken record = RecordToken.builder()
|
||||
RecordChunk record = RecordChunk.builder()
|
||||
.level(level)
|
||||
.tag(tag)
|
||||
.value(value)
|
||||
|
@ -91,7 +91,7 @@ class GedcomTokenizer {
|
|||
return record;
|
||||
}
|
||||
|
||||
private void pushRecordToken(RecordToken record) {
|
||||
private void pushRecordChunk(RecordChunk record) {
|
||||
if(this.stack.isEmpty()) {
|
||||
this.stack.push(record);
|
||||
this.records.add(record);
|
||||
|
|
|
@ -9,27 +9,27 @@ import java.util.*;
|
|||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
public class RecordToken {
|
||||
public class RecordChunk {
|
||||
|
||||
private Map<String, RecordToken> recordIndex;
|
||||
private Map<String, RecordChunk> recordIndex;
|
||||
|
||||
private int level;
|
||||
private String tag;
|
||||
private String value;
|
||||
|
||||
@Builder.Default
|
||||
private List<RecordToken> subRecords = new LinkedList<>();
|
||||
private List<RecordChunk> subRecords = new LinkedList<>();
|
||||
|
||||
public Optional<String> findFirstValue(String tag) {
|
||||
return findFirst(tag)
|
||||
.map(record -> record.getValue());
|
||||
}
|
||||
|
||||
public Optional<RecordToken> findFirst(String tag) {
|
||||
public Optional<RecordChunk> findFirst(String tag) {
|
||||
return findIndexed(tag);
|
||||
}
|
||||
|
||||
private Optional<RecordToken> findIndexed(String tag) {
|
||||
private Optional<RecordChunk> findIndexed(String tag) {
|
||||
if(this.recordIndex == null) {
|
||||
this.recordIndex = new HashMap<>();
|
||||
}
|
||||
|
@ -39,13 +39,13 @@ public class RecordToken {
|
|||
return searchRecord(tag).map(record -> putIndex(tag, record));
|
||||
}
|
||||
|
||||
private Optional<RecordToken> searchRecord(String tag) {
|
||||
private Optional<RecordChunk> searchRecord(String tag) {
|
||||
String[] pathTokens = tag.split("\\.");
|
||||
|
||||
RecordToken lastRecord = this;
|
||||
RecordChunk lastRecord = this;
|
||||
|
||||
for(String token : pathTokens) {
|
||||
for(RecordToken record : lastRecord.subRecords) {
|
||||
for(RecordChunk record : lastRecord.subRecords) {
|
||||
if(record.getTag().equals(token)) {
|
||||
lastRecord = record;
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ public class RecordToken {
|
|||
return Optional.ofNullable(lastRecord);
|
||||
}
|
||||
|
||||
private RecordToken putIndex(String tag, RecordToken record) {
|
||||
private RecordChunk putIndex(String tag, RecordChunk record) {
|
||||
this.recordIndex.put(tag, record);
|
||||
return record;
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser.records;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import de.nth.chronicle.gedcom.GedcomHeader;
|
||||
import de.nth.chronicle.gedcom.parser.GedcomRecordParser;
|
||||
import de.nth.chronicle.gedcom.parser.InvalidGedcomException;
|
||||
import de.nth.chronicle.gedcom.parser.RecordToken;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class HeaderRecordParser implements GedcomRecordParser {
|
||||
|
||||
public static final Pattern VERSION_REGEX = Pattern.compile("(\\p{Digit}{1,3})\\.(\\p{Digit}{1,3})(?:\\.(\\p{Digit}{1,3}))?");
|
||||
|
||||
@Override
|
||||
public void parse(RecordToken token, Gedcom.GedcomBuilder builder) {
|
||||
|
||||
System.out.println("Header: " + token.getSubRecords());
|
||||
|
||||
GedcomHeader header = GedcomHeader.builder()
|
||||
.characterSet(token.findFirstValue("CHAR")
|
||||
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("HEAD.CHAR")))
|
||||
.versionNumber(token.findFirstValue("GEDC.VERS")
|
||||
.map(this::validateVersion)
|
||||
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("GEDC.VERS")))
|
||||
.gedcomForm(token.findFirstValue("GEDC.FORM")
|
||||
.orElse(null))
|
||||
.gedcomFormVersion(token.findFirstValue("GEDC.FORM.VERS")
|
||||
.map(this::validateVersion)
|
||||
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("GEDC.VERS")))
|
||||
.approvedSystemId(token.findFirstValue("SOUR").orElse(null))
|
||||
.sourceVersion(token.findFirstValue("SOUR.VERS").orElse(null))
|
||||
.nameOfProduct(token.findFirstValue("SOUR.NAME").orElse(null))
|
||||
.nameOfBusiness(token.findFirstValue("SOUR.CORP").orElse(null)) //TODO address
|
||||
.nameOfSourceData(token.findFirstValue("SOUR.DATA.").orElse(null))
|
||||
.publicationDate(token.findFirstValue("SOUR.DATA.DATE").map(LocalDate::parse).orElse(null))
|
||||
//.copyrightSourceData() TODO
|
||||
|
||||
.build();
|
||||
|
||||
builder.header(header);
|
||||
|
||||
}
|
||||
|
||||
private String validateVersion(String version) {
|
||||
if(VERSION_REGEX.matcher(version).matches()) return version;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package de.nth.chronicle.gedcom.parser;
|
||||
|
||||
import de.nth.chronicle.gedcom.Gedcom;
|
||||
import de.nth.chronicle.gedcom.GedcomVersion;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class GedcomParserTests {
|
||||
|
||||
@Test
|
||||
void testParserMinimal() throws Exception {
|
||||
|
||||
GedcomParser parser = GedcomParser.getParser(GedcomVersion.VERSION_5_5_5);
|
||||
|
||||
Gedcom gedcom = parser.parseGedcom(GedcomParserTests.class.getResourceAsStream("/examples/555SAMPLE.ged"));
|
||||
|
||||
System.out.println(gedcom);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -53,10 +53,10 @@ public class GedcomTokenizerTests {
|
|||
|
||||
GedcomTokenizer tokenizer = new GedcomTokenizer(stream);
|
||||
|
||||
List<RecordToken> records = null;
|
||||
List<RecordChunk> records = null;
|
||||
|
||||
try {
|
||||
records = tokenizer.parseRecordsTokens();
|
||||
records = tokenizer.parseRecordsChunks();
|
||||
}catch(Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue