REFC renamed RecordToken -> RecordChunk

master
Niclas Thobaben 2020-11-27 12:56:12 +01:00
parent c8e50a410b
commit e79d6f6236
11 changed files with 20 additions and 217 deletions

View File

@ -7,6 +7,6 @@ import lombok.Data;
@Data
public class Gedcom {
private final GedcomHeader header;
}

View File

@ -1,41 +0,0 @@
package de.nth.chronicle.gedcom;
import lombok.Builder;
import lombok.Data;
import lombok.Getter;
import lombok.NonNull;
import java.time.LocalDate;
import java.time.LocalTime;
@Data
@Builder(toBuilder = true)
public class GedcomHeader {
private final String versionNumber;
@NonNull
private final String gedcomForm;
@NonNull
private final String gedcomFormVersion;
@NonNull
private final String characterSet;
//SOUR
private final String approvedSystemId;
private final String sourceVersion;
private final String nameOfProduct;
private final String nameOfBusiness;
private final String nameOfSourceData;
private final LocalDate publicationDate;
private final String copyrightSourceData;
private final String receivingSystemName;
private final LocalDate transmissionDate;
private final LocalTime time;
private final String fileName;
private final String copyrightGedcomFile;
private final String language;
private final String contentDescription;
}

View File

@ -1,26 +0,0 @@
package de.nth.chronicle.gedcom;
public enum GedcomVersion {
VERSION_5_5_5("5.5.5");
private String version;
GedcomVersion(String version) {
this.version = version;
}
public String getVersion() {
return this.version;
}
public static GedcomVersion forVersionString(String version) {
for(GedcomVersion ver : values()) {
if(ver.getVersion().equals(version)) {
return ver;
}
}
throw new IllegalArgumentException(String.format("No Gedcom Version '%s' found!", version));
}
}

View File

@ -1,36 +0,0 @@
package de.nth.chronicle.gedcom.parser;
import de.nth.chronicle.gedcom.Gedcom;
import de.nth.chronicle.gedcom.parser.records.HeaderRecordParser;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Gedcom555Parser implements GedcomParser {
private static final Map<String, GedcomRecordParser> RECORD_PARSER_MAP = new HashMap<>();
static {
RECORD_PARSER_MAP.put("HEAD", new HeaderRecordParser());
}
@Override
public Gedcom parseGedcom(InputStream stream) throws Exception {
GedcomTokenizer tokenizer = new GedcomTokenizer(stream);
List<RecordToken> tokens = tokenizer.parseRecordsTokens();
Gedcom.GedcomBuilder builder = Gedcom.builder();
for(RecordToken token : tokens) {
if(RECORD_PARSER_MAP.containsKey(token.getTag())) {
RECORD_PARSER_MAP.get(token.getTag()).parse(token, builder);
}else {
System.err.println("No Parser found for tag " + token.getTag());
}
}
return builder.build();
}
}

View File

@ -1,20 +0,0 @@
package de.nth.chronicle.gedcom.parser;
import de.nth.chronicle.gedcom.Gedcom;
import de.nth.chronicle.gedcom.GedcomVersion;
import java.io.InputStream;
public interface GedcomParser {
public Gedcom parseGedcom(InputStream stream) throws Exception;
public static GedcomParser getDefault() {
return getParser(GedcomVersion.VERSION_5_5_5);
}
public static GedcomParser getParser(GedcomVersion version) {
return new Gedcom555Parser();
}
}

View File

@ -4,6 +4,6 @@ import de.nth.chronicle.gedcom.Gedcom;
public interface GedcomRecordParser {
public void parse(RecordToken token, Gedcom.GedcomBuilder builder);
public void parse(RecordChunk token, Gedcom.GedcomBuilder builder);
}

View File

@ -22,8 +22,8 @@ class GedcomTokenizer {
private String currentLine;
private int currentLineNumber;
private final List<RecordToken> records = new LinkedList<>();
private final Stack<RecordToken> stack = new Stack<>();
private final List<RecordChunk> records = new LinkedList<>();
private final Stack<RecordChunk> stack = new Stack<>();
private final BufferedReader reader;
public GedcomTokenizer(InputStream stream) {
@ -45,7 +45,7 @@ class GedcomTokenizer {
* do not trim trailing white space from any GEDCOM line or line value
* do not trim leading white space from any line value
* */
public List<RecordToken> parseRecordsTokens() throws GedcomException {
public List<RecordChunk> parseRecordsChunks() throws GedcomException {
if(!this.records.isEmpty()) {
return this.records;
@ -57,7 +57,7 @@ class GedcomTokenizer {
throw new InvalidLineException(this.currentLineNumber, this.currentLine, "Line is too long!");
}
pushRecordToken(parseRecordToken());
pushRecordChunk(parseRecordChunk());
this.currentLineNumber++;
}
@ -72,7 +72,7 @@ class GedcomTokenizer {
}
}
private RecordToken parseRecordToken() {
private RecordChunk parseRecordChunk() {
Matcher matcher = matchLine(this.currentLine);
if(!matcher.matches()) {
throw new InvalidGedcomException.InvalidLine(this.currentLineNumber, this.currentLine);
@ -82,7 +82,7 @@ class GedcomTokenizer {
String tag = matcher.group(2);
String value = matcher.group(3);
RecordToken record = RecordToken.builder()
RecordChunk record = RecordChunk.builder()
.level(level)
.tag(tag)
.value(value)
@ -91,7 +91,7 @@ class GedcomTokenizer {
return record;
}
private void pushRecordToken(RecordToken record) {
private void pushRecordChunk(RecordChunk record) {
if(this.stack.isEmpty()) {
this.stack.push(record);
this.records.add(record);

View File

@ -9,27 +9,27 @@ import java.util.*;
@Getter
@Setter
@Builder
public class RecordToken {
public class RecordChunk {
private Map<String, RecordToken> recordIndex;
private Map<String, RecordChunk> recordIndex;
private int level;
private String tag;
private String value;
@Builder.Default
private List<RecordToken> subRecords = new LinkedList<>();
private List<RecordChunk> subRecords = new LinkedList<>();
public Optional<String> findFirstValue(String tag) {
return findFirst(tag)
.map(record -> record.getValue());
}
public Optional<RecordToken> findFirst(String tag) {
public Optional<RecordChunk> findFirst(String tag) {
return findIndexed(tag);
}
private Optional<RecordToken> findIndexed(String tag) {
private Optional<RecordChunk> findIndexed(String tag) {
if(this.recordIndex == null) {
this.recordIndex = new HashMap<>();
}
@ -39,13 +39,13 @@ public class RecordToken {
return searchRecord(tag).map(record -> putIndex(tag, record));
}
private Optional<RecordToken> searchRecord(String tag) {
private Optional<RecordChunk> searchRecord(String tag) {
String[] pathTokens = tag.split("\\.");
RecordToken lastRecord = this;
RecordChunk lastRecord = this;
for(String token : pathTokens) {
for(RecordToken record : lastRecord.subRecords) {
for(RecordChunk record : lastRecord.subRecords) {
if(record.getTag().equals(token)) {
lastRecord = record;
}
@ -57,7 +57,7 @@ public class RecordToken {
return Optional.ofNullable(lastRecord);
}
private RecordToken putIndex(String tag, RecordToken record) {
private RecordChunk putIndex(String tag, RecordChunk record) {
this.recordIndex.put(tag, record);
return record;
}

View File

@ -1,54 +0,0 @@
package de.nth.chronicle.gedcom.parser.records;
import de.nth.chronicle.gedcom.Gedcom;
import de.nth.chronicle.gedcom.GedcomHeader;
import de.nth.chronicle.gedcom.parser.GedcomRecordParser;
import de.nth.chronicle.gedcom.parser.InvalidGedcomException;
import de.nth.chronicle.gedcom.parser.RecordToken;
import java.time.LocalDate;
import java.util.regex.Pattern;
public class HeaderRecordParser implements GedcomRecordParser {
public static final Pattern VERSION_REGEX = Pattern.compile("(\\p{Digit}{1,3})\\.(\\p{Digit}{1,3})(?:\\.(\\p{Digit}{1,3}))?");
@Override
public void parse(RecordToken token, Gedcom.GedcomBuilder builder) {
System.out.println("Header: " + token.getSubRecords());
GedcomHeader header = GedcomHeader.builder()
.characterSet(token.findFirstValue("CHAR")
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("HEAD.CHAR")))
.versionNumber(token.findFirstValue("GEDC.VERS")
.map(this::validateVersion)
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("GEDC.VERS")))
.gedcomForm(token.findFirstValue("GEDC.FORM")
.orElse(null))
.gedcomFormVersion(token.findFirstValue("GEDC.FORM.VERS")
.map(this::validateVersion)
.orElseThrow(() -> new InvalidGedcomException.InvalidOrMissingRecord("GEDC.VERS")))
.approvedSystemId(token.findFirstValue("SOUR").orElse(null))
.sourceVersion(token.findFirstValue("SOUR.VERS").orElse(null))
.nameOfProduct(token.findFirstValue("SOUR.NAME").orElse(null))
.nameOfBusiness(token.findFirstValue("SOUR.CORP").orElse(null)) //TODO address
.nameOfSourceData(token.findFirstValue("SOUR.DATA.").orElse(null))
.publicationDate(token.findFirstValue("SOUR.DATA.DATE").map(LocalDate::parse).orElse(null))
//.copyrightSourceData() TODO
.build();
builder.header(header);
}
private String validateVersion(String version) {
if(VERSION_REGEX.matcher(version).matches()) return version;
return null;
}
}

View File

@ -1,20 +0,0 @@
package de.nth.chronicle.gedcom.parser;
import de.nth.chronicle.gedcom.Gedcom;
import de.nth.chronicle.gedcom.GedcomVersion;
import org.junit.jupiter.api.Test;
public class GedcomParserTests {
@Test
void testParserMinimal() throws Exception {
GedcomParser parser = GedcomParser.getParser(GedcomVersion.VERSION_5_5_5);
Gedcom gedcom = parser.parseGedcom(GedcomParserTests.class.getResourceAsStream("/examples/555SAMPLE.ged"));
System.out.println(gedcom);
}
}

View File

@ -53,10 +53,10 @@ public class GedcomTokenizerTests {
GedcomTokenizer tokenizer = new GedcomTokenizer(stream);
List<RecordToken> records = null;
List<RecordChunk> records = null;
try {
records = tokenizer.parseRecordsTokens();
records = tokenizer.parseRecordsChunks();
}catch(Exception e) {
e.printStackTrace();
}