package nl.wldelft.fews.system.plugin.dataImporttimeseriesparsers;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import java.io.IOException;
import java.util.TimeZone;
/**
* This parser supports two formats: WISKI7 and the previous format of WISKI (number unknown)
* Recognizing the formats:
* a) If the header contains the keyword TSPATH, then it is WISKI7.
* The event rows wil be parsed according to the format as specified with the keyword LAYOUT. If no LAYOUT specified, the defaults wil be used.
* b) otherwise it is older Wiski format.
* The event rows wil be parsed as follows: the first column is timestamp, the second column is value
*/
public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = LoggerLogManager.getLogger(WiskiTimeSeriesParser.class);
private static final char remarkSepar = '"';
private boolean anyHeaderInvalid = false;
private LineReader reader = null;
private TimeSeriesContentHandler contentHandler = null;
private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
private TimeZone headerTimeZone; //timeZone read from the file header
private String virtualFileName;
//Fields <name>Index specify the column of the timestamp, value, status ,.... in the event row.
//With the keyword LAYOUT an another sequence of the data can be specified.
private int timestampIndex;
private int valueIndex;
private int statusIndex;
private int interpolation_typeIndex;
private int remarkIndex;
private boolean defaultWiski7LayoutUsed = true;
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception {
this.virtualFileName = virtualFileName;
this.contentHandler = contentHandler;
this.contentHandler.addMissingValue(-777.0f);
this.reader = reader;
this.reader.setCommentLinePrefix('?');
this.reader.setSkipEmptyLines(true);
this.header.clear();
this.headerTimeZone = null;
//Initialized the columns to read data from the event rows.
initializeWiski7RowLayout();
anyHeaderInvalid = false;
boolean headerValid = false;
reader.mark(500);
String[] buffer = new String[5];
for (String line; (line = reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.equals("ENDOFFILE")) return;
if (line.charAt(0) == '#') {
reader.reset();
headerValid = parseHeader();
continue;
}
if (this.header.getLocationId() == null && this.header.!headerValid) {
anyHeaderInvalid = true;
continue;
}
if (this.header.getLocationId() == null && this.header.getParameterId() == null)
throw new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in the file header");
if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continue;
//If default layout used, the remarks (if any) must be specified last in the line and start with "-token
String remarksDefaultLayout;
if (this.defaultWiski7LayoutUsed && line.indexOf(remarkSepar) != -1) {
//extract remarks first (if any), so that they cannot be confused with the status or interp.type
String leftStr = TextUtils.leftFrom(line, remarkSepar);
remarksDefaultLayout = TextUtils.rightFrom(line, remarkSepar).replace(remarkSepar, ' ');
TextUtils.split(leftStr, ' ', buffer);
} else {
remarksDefaultLayout = null;
TextUtils.split(line, ' ', buffer);
}
if (this.headerTimeZone != null) {
contentHandler.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[this.timestampIndex]);
} else {
contentHandler.setTime(contentHandler.getDefaultTimeZone(), "yyyyMMddHHmmss", buffer[this.timestampIndex]);
}
int flag = getFlag(this.defaultWiski7LayoutUsed, this.statusIndex, this.interpolation_typeIndex, buffer, line);
if (flag != Integer.MIN_VALUE) {
contentHandler.setFlag(flag);
}
contentHandler.setValue('.', buffer[this.valueIndex]);
contentHandler.setComment(null); //reset
if (remarksDefaultLayout != null) {
if (!remarksDefaultLayout.isEmpty()) contentHandler.setComment(remarksDefaultLayout);
} else {
if (this.remarkIndex != -1) {
String remark = buffer[this.remarkIndex].replace(remarkSepar, ' ');
if (!remark.isEmpty()) contentHandler.setComment(remark);
}
}
contentHandler.applyCurrentFields();
}
}
/**
* if (anyHeaderInvalid) throw new IOException(" the file has one or more invalid headers"); // throw exception since the file should be marked as not fully successful
}
/**
* Read metadata from the #-records. Metadata block is followed by the timeseries-records
* but the timeseries-records may be also omitted. In this case the Metadata block MUST start
* with a record that begins with ## !
* Empty records wil be ignored.
* <p/>
* The meaning of the keys is:
* TZ : time zone. TZ are UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
* TSPATH : /site id/location id/parameter id/ts shortname
* example TSPATH/160/160_1/WATHTE/cmd.p
* only location id and parameter id is parsed and used
* SANR : location id. Used only if not specified with TSPATH
* CNAME: parameter id. Used only if not specified with TSPATH
* CUNIT: unit
* RINVAL: missing value
* REXCHANGE: location-parameter. Wil be used only if the metadata block does not contain keys TSPATH, SANR or CNAME.
* The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals)
*
* @throws IOException if the header format is incorrect
*/
private voidboolean parseHeader() throws IOException {
this.header.clear();
this.headerTimeZone = null;
//Initialized the columns to read data from the event rows.
initializeWiski7RowLayout();
String tspathPar = null;
String tspathQual = null;
String tspathLoc = null;
String fallbackParLoc = null;
for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.charAt(0) != '#') {
reader.reset();
break;
}
String layoutString = parseKeyValue("LAYOUT", line);
if (layoutString != null) {
defaultWiski7LayoutUsed = false;
//Obtain information how to parse event rows
parseEventRowLayout(layoutString.trim());
}
String tzString = parseKeyValue("|TZ", line);
if (tzString != null) {
this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID());
}
//Parse location id and parameter specified with keyword TSPATH
//format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>
//example: TSPATH/160/160_1/WATHTE/cmd.p (contains always all these 4 elements )
//<ts shortname> is read as qualifier
String tspath = parseKeyValue("TSPATH", line);
ifint (tspathindex != nullline.contains("TSPATH/") {
? 1 : 0;
Stringif (tspath != null && !tspath.trim().equals("/")) {
//TSPATH available and not empty
String[] buffer = TextUtils.split(tspath, '/');
if (buffer.length != 4 5+ index || buffer[21 + index].length() < 1 || buffer[32+index].length() < 1) {
throw new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath +
" expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>");
}
tspathLoc = buffer[21+index];
tspathPar = buffer[32+index];
tspathQual = buffer[43+index].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscores
}
String locationId = parseKeyValue("SANR", line);
if (locationId != null && !locationId.isBlank()) header.setLocationId(locationId);
String parameterId = parseKeyValue("CNAME", line);
if (log.isDebugEnabled() && locationId != null && locationId.isBlank()) {
if (parameterId != null) header.setParameterId(parameterIdlog.debug("Location id is blank, skipping current line.");
}
String unitparameterId = parseKeyValue("CUNITCNAME", line);
if (unitparameterId != null) header.setUnit(unit&& !parameterId.isBlank()) header.setParameterId(parameterId);
if String missingValue = parseKeyValue("RINVAL", line);
(log.isDebugEnabled() && parameterId != null && parameterId.isBlank()) {
if (missingValue != null) contentHandler.addMissingValue(missingValue);log.debug("Parameter id is blank, skipping current line.");
}
String parLocunit = parseKeyValue("REXCHANGECUNIT", line);
if (parLocunit != null) fallbackParLoc = parLoc;
header.setUnit(unit);
String missingValue }
= parseKeyValue("RINVAL", line);
if (tspathPar != null &&if tspathLoc(missingValue != null) {contentHandler.addMissingValue(missingValue);
//If par id, qualifier id and loc are specified with TSPATH, use them , even if the keywords SANR and SNAME are also present in the file
String parLoc = parseKeyValue("REXCHANGE", line);
if (parLoc != null) fallbackParLoc = parLoc;
}
if header.setParameterId(tspathPar);
!= null && tspathLoc != null) {
header.setQualifierIds(tspathQual);
//If par id, qualifier header.setLocationId(tspathLoc);
} else {
//The header has an OLD format (no WISKI7), if any LAYOUT specifid, it wil be ignored !
id and loc are specified with TSPATH, use them , even if the keywords SANR and SNAME are also present in the file
header.setParameterId(tspathPar);
header.setQualifierIds(tspathQual);
//Re-initialized the columns to read data from the event rows (no flags reading !).header.setLocationId(tspathLoc);
} initializeRowLayout();else {
if//The (header.getParameterId() == null || header.getLocationId() == null) {
header has an OLD format (no WISKI7), if any LAYOUT specifid, it wil be ignored !
header.setParameterId(fallbackParLoc);
//Re-initialized the columns to read data from the event rows (no flags reading header.setLocationId(fallbackParLoc);!).
}initializeRowLayout();
}
contentHandler.setTimeSeriesHeader(header);
}
private void initializeWiski7RowLayout(if (header.getParameterId() == null || header.getLocationId() == null) {
defaultWiski7LayoutUsed = true;
if timestampIndex = 0; (fallbackParLoc != null && !fallbackParLoc.isEmpty()) {
// timestamp wil be read from column 1 by default
header.setParameterId(fallbackParLoc);
valueIndex = 1; header.setLocationId(fallbackParLoc);
// value wil be read from column 2} byelse default{
statusIndex = 2; log.warn(this.reader.getFileAndLineNumber()+" parameter//location is missing statusnext wilto bethe read from column 3 by defaultkeyword REXCHANGE");
interpolation_typeIndex = 3; // interpolation_type wil be read from column 4 by default
return false;
remarkIndex = 4; }
// remark wil be read}
from column 5 by default
}
private void initializeRowLayout() {
if (log.isDebugEnabled()) log.debug("Reading data for " defaultWiski7LayoutUsed = false+ header.toString());
timestampIndex = 0;if (header.getLocationId() != null || header.getParameterId() != null) contentHandler.setTimeSeriesHeader(header);
return true;
// }
timestamp is always readprivate fromvoid columninitializeWiski7RowLayout() 1{
valueIndexdefaultWiski7LayoutUsed = 1;true;
timestampIndex = 0; // valuetimestamp iswil alwaysbe read from column 1 by 2default
statusIndexvalueIndex = -1; // value statuswil NOTbe read
from column 2 by default
interpolation_typeIndex statusIndex = -1; //2; interpolation_type NOT read
// status wil be read remarkIndexfrom =column -1;3 by default
interpolation_typeIndex = 3; // remark NOTinterpolation_type wil be read
from column 4 by }default
//Some examplesremarkIndex of= the4; buffer, that is parsed from the header line LAYOUT:
//(timestamp,value)
//(timestamp,value,status,interpolationm_type,remark)
private void parseEventRowLayout(String bufferIn) throws IOException { remark wil be read from column 5 by default
}
private void initializeRowLayout() {
//Check the passed buffer, remove bracketsdefaultWiski7LayoutUsed from= bufferfalse;
String buffertimestampIndex = null0;
if (bufferIn.length() > 5)// {
timestamp is always read from column 1
if (bufferIn.charAt(0) == '(' && bufferIn.charAt(bufferIn.length() - 1) == ')') {
valueIndex = 1; // value is bufferalways = bufferIn.substring(1, bufferIn.length() - 1).trim();read from column 2
statusIndex = -1; }
}
// status if (buffer == null)NOT read
interpolation_typeIndex = throw new IOException("String specified with the LAYOUT-keyword has wrong format:" + bufferIn);
-1; // interpolation_type NOT read
remarkIndex = -1; String[] keywords = new String[5]; //max. 5 keywords expectedremark ,NOT asread
specified in the second example}
//Some examples of the TextUtils.split(buffer, ',', keywords);
//-1 = data not specified in the file (accorning to the header keyword LAYOUTthat is parsed from the header line LAYOUT:
//(timestamp,value)
//(timestamp,value,status,interpolationm_type,remark)
private void parseEventRowLayout(String bufferIn) timestampIndexthrows = -1;IOException {
valueIndex//Check = -1;
the passed buffer, remove brackets statusIndex = -1;
from buffer
String interpolation_typeIndexbuffer = -1null;
remarkIndex = -1;
if (bufferIn.length() > 5) {
forif (int i = 0; i < 5; i++bufferIn.charAt(0) == '(' && bufferIn.charAt(bufferIn.length() - 1) == ')') {
String keywordbuffer = keywords[i] bufferIn.substring(1, bufferIn.length() - 1).trim();
if (keyword.isEmpty())}
}
continue; //not all keywords must be specified, however at least 2: timestamp and value
if (keyword.equals("timestamp")) {if (buffer == null)
throw new IOException("String specified with the LAYOUT-keyword has wrong format:" + bufferIn);
String[] keywords = new String[5]; //max. 5 keywords ifexpected (timestampIndex, != -1)
as specified in the second example
TextUtils.split(buffer, ',', keywords);
throw new IOException("Keyword '" + keyword + "' specified more than once in //-1 = data not specified in the file (accorning to the header linekeyword LAYOUT");
timestampIndex = -1;
timestampIndexvalueIndex = i-1;
statusIndex = -1;
} else if (keyword.equals("value")) {
interpolation_typeIndex = -1;
if (valueIndex !remarkIndex = -1);
for (int i = 0; i < 5; throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
i++) {
String keyword = keywords[i].trim();
if (keyword.isEmpty())
valueIndex = i;
continue; }//not else all keywords must be specified, however at least 2: timestamp and value
if (keyword.equals("statustimestamp")) {
if (statusIndextimestampIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
statusIndextimestampIndex = i;
} else if (keyword.equals("interpolation_typevalue")) {
if (interpolation_typeIndexvalueIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
interpolation_typeIndexvalueIndex = i;
} else if (keyword.equals("remarkstatus") || keyword.equals("primary_status")) {
if (remarkIndexstatusIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
remarkIndexstatusIndex = i;
} else if (keyword.equals("interpolation_type")) {
throw newif IOException("Wrong keyword(interpolation_typeIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line #LAYOUT LAYOUT");
}
interpolation_typeIndex = }
i;
if (timestampIndex == -1 ||} valueIndexelse == -1if (keyword.equals("remark")) {
throw new IOException("Keywords timestamp and/or value are not specified in the header line LAYOUT !");
if (remarkIndex != -1)
}
}
throw new //Returns value or null if the key not foundIOException("Keyword '" + keyword + "' specified more than once in the buffer
header line LAYOUT");
private static String parseKeyValue(String key, String buffer) {
int keyPosremarkIndex = buffer.indexOf(key)i;
if (keyPos == -1) return null; } else if (isKnownKeyword(keyword)) {
int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
// Ignore
if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length()); } else {
if (endValuePos == -1) return null;
throw new IOException("Wrong keyword '" return buffer.substring(keyPos + key.length(), endValuePos);
+ keyword + "' specified in the header line #LAYOUT ");
}
}
//Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format
//Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {
if (buffer.equals("MEZ")) return TimeZone.getTimeZone("GMT+1")if (timestampIndex == -1 || valueIndex == -1) {
throw new IOException("Keywords timestamp and/or value are not specified in the header line LAYOUT !");
}
}
private static boolean isKnownKeyword(String key) {
if (key.equals("timestampoccurence")) return true;
if (key.equals("forecast")) return true;
if (key.equals("member")) return true;
if (key.equals("dispatch_info")) return true;
return false;
}
//Returns value or null if the key not found in the buffer
private static String parseKeyValue(String key, String buffer) {
int keyPos = buffer.indexOf(key);
if (keyPos == -1) return null;
int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length());
if (endValuePos == -1) return null;
return buffer.substring(keyPos + key.length(), endValuePos);
}
private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {
if (buffer.equals("MEZ")) return TimeZone.getTimeZone("GMT+1");
if (buffer.equals("MESZ")) return TimeZone.getTimeZone("CET");
if (buffer.equals("CET")) return TimeZone.getTimeZone("CET");
if (buffer.equals("CEST")) return TimeZone.getTimeZone("GMT+2");
if (buffer.equals("Europe/Amsterdam") || buffer.equals("Europe/Berlin") || buffer.equals("Europe/Brussels") || buffer.equals("Europe/Luxembourg") ||
buffer.equals("Europe/Madrid") || buffer.equals("Europe/Paris") || buffer.equals("Europe/Rome") || buffer.equals("Europe/Vienna") || buffer.equals("Europe/Zurich")) {
return TimeZone.getTimeZone("CET");
}
String strOffset = getUtcGmtOffset(buffer);
if (strOffset == null) {
log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " will be used.");
return null;
}
TimeZone timeZone;
if (buffer.equals("MESZ")) return TimeZone.getTimeZone("CET");
try {
if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) {
double offset = Double.parseDouble(strOffset);
timeZone = logTimeZoneUtils.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used.");
return nullcreateTimeZoneFromDouble(offset);
} catch (NumberFormatException e) {
throw new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e);
}
String strOffset = buffer.substring(3)return timeZone;
TimeZone timeZone;}
private static String getUtcGmtOffset(String trybuffer) {
if ((buffer.startsWith("UTC") || double offset = Double.parseDouble(strOffset);buffer.startsWith("GMT")) && buffer.length() >= 4) {
timeZonereturn = TimeZoneUtilsbuffer.createTimeZoneFromDoublesubstring(offset3);
}
catch (NumberFormatException e) {
if ((buffer.startsWith("Etc/UTC") || buffer.startsWith("Etc/GMT")) && buffer.length() >= 8) {
throw new IOException("Invalid timeZone specified with TZ keyword:" +return buffer, e.substring(7);
}
return timeZonenull;
}
//Parse flags from the line.
//First flag is 'status', the second one is interpolation type.
//Two optional flags are composed to one flag as follows: flag1*1000+flag2
//Flag2 must be between 0 and 999
//Line examples and the composed flags:
//20100227000709 3.0 200 103 -> 200103
//20100227000709 3.0 0 103 -> 103
//20100227000709 3.0 200 0 -> 200000
//20100227000709 3.0 200 -> 200000
private static int getFlag(boolean defaultLayout, int statusColumnIndex, int interpTypeColumnIndex, String[] buffer, String fileLine) {
int statusFlag = Integer.MIN_VALUE;
if (statusColumnIndex != -1) {
String status = buffer[statusColumnIndex];
if (status.isEmpty()) {
if (!defaultLayout) {
//status not specified according to the header, give message
log.error("Status expected, but is ommited in the line: " + fileLine);
}
return Integer.MIN_VALUE; //no status specified in defaultLayout
}
statusFlag = parseIntFlag(status);
if (statusFlag == Integer.MIN_VALUE) {
log.error("Wrong status specified in the line:" + fileLine);
return Integer.MIN_VALUE; //flag cannot be converted to integer, so no flags wil be set (for this timestep)
}
}
if (statusFlag != Integer.MIN_VALUE) statusFlag *= 1000;
int interpTypeFlag = Integer.MIN_VALUE;
if (interpTypeColumnIndex != -1) {
String interpType = buffer[interpTypeColumnIndex];
if (interpType.isEmpty()) {
if (!defaultLayout) {
//Interpolation type not specified according to the header , give message
log.error("Interpolation type expected, but is ommited in the line: " + fileLine);
}
return statusFlag;
}
interpTypeFlag = parseIntFlag(interpType);
if (interpTypeFlag == Integer.MIN_VALUE || interpTypeFlag < 0 || interpTypeFlag > 999) {
log.error("Wrong interpolation type specified, it should be between 0 and 999. Line: " + fileLine);
return Integer.MIN_VALUE;
}
}
if (statusFlag == Integer.MIN_VALUE) {
return interpTypeFlag; //only the interpolation type flag specified
}
if (interpTypeFlag == Integer.MIN_VALUE) {
return statusFlag; //only the status flag type specified
}
return statusFlag + interpTypeFlag;
}
private static int parseIntFlag(String buffer) {
int flag;
try {
flag = TextUtils.parseInt(buffer);
} catch (NumberFormatException e) {
flag = Integer.MIN_VALUE;
}
return flag;
}
}
}
|