package nl.wldelft.fews.system.plugin.dataImport;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.TimeZone;
import org.apache.log4j.Logger;
public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = Logger.getLogger(WiskiTimeSeriesParser.class);
private LineReader reader = null;
private TimeSeriesContentHandler contentHandler = null;
private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
private TimeZone headerTimeZone; //timeZone read from the file header
private String virtualFileName;
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception {
this.virtualFileName = virtualFileName;
this.contentHandler = contentHandler;
this.contentHandler.addMissingValue(-777.0f);
this.reader = reader;
this.reader.setCommentLinePrefix('?');
this.reader.setSkipEmptyLines(true);
this.header.clear();
this.headerTimeZone = null;
reader.mark(500);
String[] buffer = new String[2];
for (String line; (line = reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.equals("ENDOFFILE")) return;
if (line.charAt(0) == '#') {
reader.reset();
/**
* This parser supports two formats: WISKI7 and the previous format of WISKI (number unknown)
* Recognizing the formats:
* a) If the header contains the keyword TSPATH, then it is WISKI7.
* The event rows wil be parsed according to the format as specified with the keyword LAYOUT. If no LAYOUT specified, the defaults wil be used.
* b) otherwise it is older Wiski format.
* The event rows wil be parsed as follows: the first column is timestamp, the second column is value
*/
public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = Logger.getLogger(WiskiTimeSeriesParser.class);
private static final char remarkSepar = '"';
private LineReader reader = null;
private TimeSeriesContentHandler contentHandler = null;
private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
private TimeZone headerTimeZone; //timeZone read from the file header
private String virtualFileName;
//Fields <name>Index specify the column of the timestamp, value, status ,.... in the event row.
//With the keyword LAYOUT an another sequence of the data can be specified.
private int timestampIndex;
private int valueIndex;
private int statusIndex;
private int interpolation_typeIndex;
private int remarkIndex;
private boolean defaultWiski7LayoutUsed = true;
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception {
this.virtualFileName parseHeader()= virtualFileName;
this.contentHandler continue= contentHandler;
}
this.contentHandler.addMissingValue(-777.0f);
this.reader = reader;
if (this.headerreader.getLocationIdsetCommentLinePrefix('?') == null &&;
this.headerreader.getParameterIdsetSkipEmptyLines(true);
== null)
this.header.clear();
this.headerTimeZone = thrownull;
new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in //Initialized the columns to read data from the file header");
event rows.
initializeWiski7RowLayout();
if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continuereader.mark(500);
String[] buffer = TextUtils.split(line, ' ', buffer)new String[5];
for (String line; (line if= (this.headerTimeZonereader.readLine()) != null; reader.mark(500)) {
line = contentHandlerline.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[0]trim();
} else {if (line.equals("ENDOFFILE")) return;
contentHandler.setTime(contentHandler.getDefaultTimeZone(), "yyyyMMddHHmmss", buffer[0]);if (line.charAt(0) == '#') {
}
contentHandler.setValue('.', buffer[1]reader.reset();
contentHandler.applyCurrentFields parseHeader();
}
}
/**continue;
* Read metadata from the #-records. Metadata block}
is followed by the timeseries-records
* but the timeseries-records may be also omitted. In this case the Metadata block MUST start
if (this.header.getLocationId() == null && this.header.getParameterId() == null)
* with a record thatthrow begins with ## !
* Empty records wil be ignored.
* <p/>
new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in the file header");
* The meaning of the keys is:if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continue;
* TZ : time zone. TZ are UTC0 and UTC+//-x (e.g. UTC+1 or UTC-2).
* TSPATH : /site id/location id/parameter id/ts shortnameIf default layout used, the remarks (if any) must be specified last in the line and start with "-token
* example TSPATH/160/160_1/WATHTE/cmd.p
String remarksDefaultLayout;
* only location id and parameter id is parsed and used
if (this.defaultWiski7LayoutUsed && * SANR : location id. Used only if not specified with TSPATH
line.indexOf(remarkSepar) != -1) {
* CNAME: parameter id. Used only if not specified with TSPATH
* CUNIT: unit//extract remarks first (if any), so that they cannot be confused with the status or interp.type
* RINVAL: missing value
* REXCHANGE: location-parameter. WilString beleftStr used only if the metadata block does not contain keys TSPATH, SANR or CNAME.
= TextUtils.leftFrom(line, remarkSepar);
*remarksDefaultLayout The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals)
*= TextUtils.rightFrom(line, remarkSepar).replace(remarkSepar, ' ');
TextUtils.split(leftStr, ' ', buffer);
* @throws IOException if the header format} iselse incorrect{
*/
private void parseHeader() throws IOException {
remarksDefaultLayout this.header.clear()= null;
this.headerTimeZone = null;
TextUtils.split(line, ' String tspathPar = null', buffer);
String tspathQual = null;}
String tspathLoc = null;
if (this.headerTimeZone != null) {
String fallbackParLoc = null;
for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) {
contentHandler.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[this.timestampIndex]);
} else line{
= line.trim();
if contentHandler.setTime(linecontentHandler.charAtgetDefaultTimeZone(0) != '#') {, "yyyyMMddHHmmss", buffer[this.timestampIndex]);
}
reader.reset();
int flag = getFlag(this.defaultWiski7LayoutUsed, this.statusIndex, this.interpolation_typeIndex, buffer, breakline);
}
if
(flag != Integer.MIN_VALUE) {
String tzString = parseKeyValue("TZ", linecontentHandler.setFlag(flag);
if (tzString != null) {}
this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID());contentHandler.setValue('.', buffer[this.valueIndex]);
}
contentHandler.setComment(null); //reset
//Parse location id and parameter specified with keyword TSPATH
if (remarksDefaultLayout != null) {
//format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>
if (!remarksDefaultLayout.isEmpty()) contentHandler.setComment(remarksDefaultLayout);
} else {
//example: TSPATH/160/160_1/WATHTE/cmd.p (contains always all these 4 elements )
if (this.remarkIndex != -1) {
//<ts shortname> is read as qualifier
String tspathremark = parseKeyValue("TSPATH", linebuffer[this.remarkIndex].replace(remarkSepar, ' ');
if (tspath != null) {!remark.isEmpty()) contentHandler.setComment(remark);
String[]}
buffer = TextUtils.split(tspath, '/');
}
if (buffer.length != 5 || buffer[2].length() < 1 || buffer[3].length() < 1) { contentHandler.applyCurrentFields();
}
}
/**
* Read metadata from the #-records. Metadata throwblock is new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath +
" expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>");
}
tspathLoc = buffer[2];followed by the timeseries-records
* but the timeseries-records may be also omitted. In this case the Metadata block MUST start
* with a record that begins with ## !
* Empty records wil be ignored.
* <p/>
* The meaning of the keys is:
* TZ : time zone. TZ are UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
* TSPATH : /site id/location id/parameter id/ts shortname
* example TSPATH/160/160_1/WATHTE/cmd.p
* only location id and parameter id is parsed and used
* SANR : location id. Used only if not specified with TSPATH
* CNAME: parameter id. Used only if not specified with TSPATH
* CUNIT: unit
* RINVAL: missing value
* REXCHANGE: location-parameter. Wil be used only if the metadata block does not contain keys TSPATH, SANR or CNAME.
* The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals)
*
* @throws IOException if the header format is incorrect
*/
private void parseHeader() throws IOException {
this.header.clear();
this.headerTimeZone = null;
//Initialized the columns to read data from the event rows.
initializeWiski7RowLayout();
String tspathPar = null;
String tspathQual = null;
String tspathLoc = null;
String fallbackParLoc = null;
for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.charAt(0) != '#') {
reader.reset();
break;
}
String layoutString = parseKeyValue("LAYOUT", line);
if (layoutString != null) {
defaultWiski7LayoutUsed = false;
//Obtain information how to parse event rows
parseEventRowLayout(layoutString.trim());
}
String tzString = parseKeyValue("TZ", line);
if (tzString != null) {
this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID());
}
//Parse location id and parameter specified with keyword TSPATH
//format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>
//example: TSPATH/160/160_1/WATHTE/cmd.p (contains always all these 4 elements )
//<ts shortname> is read as qualifier
String tspath = parseKeyValue("TSPATH", line);
if (tspath != null) {
String[] buffer = TextUtils.split(tspath, '/');
if (buffer.length != 5 || buffer[2].length() < 1 || buffer[3].length() < 1) {
throw new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath +
" expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>");
}
tspathLoc = buffer[2];
tspathPar = buffer[3];
tspathQual = buffer[4].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscores
}
String locationId = parseKeyValue("SANR", line);
if (locationId != null) header.setLocationId(locationId);
String parameterId = parseKeyValue("CNAME", line);
if (parameterId != null) header.setParameterId(parameterId);
String unit = parseKeyValue("CUNIT", line);
if (unit != null) header.setUnit(unit);
String missingValue = parseKeyValue("RINVAL", line);
if (missingValue != null) contentHandler.addMissingValue(missingValue);
String parLoc = parseKeyValue("REXCHANGE", line);
if (parLoc != null) fallbackParLoc = parLoc;
}
if (tspathPar != null && tspathLoc != null) {
//If par id, qualifier id and loc are specified with TSPATH, use them , even if the keywords SANR and SNAME are also present in the file
header.setParameterId(tspathPar);
header.setQualifierIds(tspathQual);
header.setLocationId(tspathLoc);
} else {
//The header has an OLD format (no WISKI7), if any LAYOUT specifid, it wil be ignored !
//Re-initialized the columns to read data from the event rows (no flags reading !).
initializeRowLayout();
if (header.getParameterId() == null || header.getLocationId() == null) {
header.setParameterId(fallbackParLoc);
header.setLocationId(fallbackParLoc);
}
}
contentHandler.setTimeSeriesHeader(header);
}
private void initializeWiski7RowLayout() {
defaultWiski7LayoutUsed = true;
timestampIndex = 0; // timestamp wil be read from column 1 by default
valueIndex = 1; // value wil be read from column 2 by default
statusIndex = 2; // status wil be read from column 3 by default
interpolation_typeIndex = 3; // interpolation_type wil be read from column 4 by default
remarkIndex = 4; // remark wil be read from column 5 by default
}
private void initializeRowLayout() {
defaultWiski7LayoutUsed = false;
timestampIndex = 0; // timestamp is always read from column 1
valueIndex = 1; // value is always read from column 2
statusIndex = -1; // status NOT read
interpolation_typeIndex = -1; // interpolation_type NOT read
remarkIndex = -1; // remark NOT read
}
//Some examples of the buffer, that is parsed from the header line LAYOUT:
//(timestamp,value)
//(timestamp,value,status,interpolationm_type,remark)
private void parseEventRowLayout(String bufferIn) throws IOException {
//Check the passed buffer, remove brackets from buffer
String buffer = null;
if (bufferIn.length() > 5) {
if (bufferIn.charAt(0) == '(' && bufferIn.charAt(bufferIn.length() - 1) == ')') {
buffer = bufferIn.substring(1, bufferIn.length() - 1).trim();
}
}
if (buffer == null)
throw new IOException("String specified with the LAYOUT-keyword has wrong format:" + bufferIn);
String[] keywords = new String[5]; //max. 5 keywords expected , as specified in the second example
TextUtils.split(buffer, ',', keywords);
//-1 = data not specified in the file (accorning to the header keyword LAYOUT)
timestampIndex = -1;
valueIndex = -1;
statusIndex = -1;
interpolation_typeIndex = -1;
remarkIndex = -1;
for (int i = 0; i < 5; i++) {
String keyword = keywords[i].trim();
if (keyword.isEmpty())
continue; //not all keywords must be specified, however at least 2: timestamp and value
if (keyword.equals("timestamp")) {
if (timestampIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
timestampIndex = i;
} else if (keyword.equals("value")) {
if (valueIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
valueIndex = i;
} else if (keyword.equals("status")) {
if (statusIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
statusIndex = i;
} else if (keyword.equals("interpolation_type")) {
if (interpolation_typeIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
interpolation_typeIndex = i;
} else if (keyword.equals("remark")) {
if (remarkIndex != -1)
throw new IOException("Keyword '" + keyword + "' specified more than once in the header line LAYOUT");
remarkIndex = i;
} else {
throw new IOException("Wrong keyword '" + keyword + "' specified in the header line #LAYOUT ");
}
}
if (timestampIndex == -1 || valueIndex == -1) {
throw new IOException("Keywords timestamp and/or value are not specified in the header line LAYOUT !");
}
}
//Returns value or null if the key not found in the buffer
private static String parseKeyValue(String key, String buffer) {
int keyPos = buffer.indexOf(key);
if (keyPos == -1) return null;
int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length());
if (endValuePos == -1) return null;
return buffer.substring(keyPos + key.length(), endValuePos);
}
//Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format
//Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {
if (buffer.equals("MEZ")) return TimeZone.getTimeZone("GMT+1");
if (buffer.equals("MESZ")) return TimeZone.getTimeZone("CET");
if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) {
log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used.");
return null;
}
String strOffset = buffer.substring(3);
TimeZone timeZone;
try {
double offset = Double.parseDouble(strOffset);
timeZone = TimeZoneUtils.createTimeZoneFromDouble(offset);
} catch (NumberFormatException e) {
throw new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e);
}
return timeZone;
}
//Parse flags from the line.
//First flag is 'status', the second one is interpolation type.
//Two optional flags are composed to one flag as follows: flag1*1000+flag2
//Flag2 must be between 0 and 999
//Line examples and the composed flags:
//20100227000709 3.0 200 103 -> 200103
//20100227000709 3.0 0 103 -> 103
//20100227000709 3.0 200 0 -> 200000
//20100227000709 3.0 200 -> 200000
private static int getFlag(boolean defaultLayout, int statusColumnIndex, int interpTypeColumnIndex, String[] buffer, String fileLine) {
int statusFlag = Integer.MIN_VALUE;
if (statusColumnIndex != -1) {
String status tspathPar = buffer[3statusColumnIndex];
if (status.isEmpty()) {
tspathQual = buffer[4].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscoresif (!defaultLayout) {
}
//status not specified according Stringto locationIdthe = parseKeyValue("SANR", line);header, give message
if (locationId != null) header.setLocationId(locationId);
log.error("Status expected, but is ommited in String parameterId = parseKeyValue("CNAME", linethe line: " + fileLine);
if (parameterId != null) header.setParameterId(parameterId); }
String unit = parseKeyValue("CUNIT", line);
return Integer.MIN_VALUE; //no status specified in defaultLayout
if (unit != null) header.setUnit(unit); }
String missingValue = parseKeyValue("RINVAL", linestatusFlag = parseIntFlag(status);
if (missingValuestatusFlag !== nullInteger.MIN_VALUE) contentHandler.addMissingValue(missingValue);
{
String parLoc = parseKeyValuelog.error("REXCHANGE", lineWrong status specified in the line:" + fileLine);
if (parLoc != null) fallbackParLoc = parLoc;
}
if (tspathPar != null && tspathLoc != null) {
return Integer.MIN_VALUE; //flag cannot be converted to integer, so no flags wil be set (for this timestep)
}
//If par id,}
qualifier id and loc are specified with if TSPATH, use them , even if the keywords SANR and SNAME are also present in the file(statusFlag != Integer.MIN_VALUE) statusFlag *= 1000;
int interpTypeFlag = Integer.MIN_VALUE;
if (interpTypeColumnIndex != header.setParameterId(tspathPar);
-1) {
header.setQualifierIds(tspathQual);
String interpType = buffer[interpTypeColumnIndex];
if header(interpType.setLocationIdisEmpty(tspathLoc)); {
} else if (header.getParameterId() == null || header.getLocationId() == null if (!defaultLayout) {
header.setParameterId(fallbackParLoc);
//Interpolation type not specified according to the header.setLocationId(fallbackParLoc);
, give message
}
contentHandlerlog.setTimeSeriesHeader(header);
}
//Returns value or null if the key not found in the buffer
private static String parseKeyValue(String key, String buffer) {
error("Interpolation type expected, but is ommited in the line: " + fileLine);
}
int keyPos = buffer.indexOf(key);return statusFlag;
if (keyPos == -1) return null;}
int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
interpTypeFlag = parseIntFlag(interpType);
if (endValuePosinterpTypeFlag == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length());
if (endValuePos == -1) return null;Integer.MIN_VALUE || interpTypeFlag < 0 || interpTypeFlag > 999) {
return buffer.substring(keyPos + key.length(), endValuePos);
}
//Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format
//Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {
log.error("Wrong interpolation type specified, it should be between 0 and 999. Line: " + fileLine);
return Integer.MIN_VALUE;
}
}
if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) {
statusFlag == Integer.MIN_VALUE) {
return interpTypeFlag; log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used.");//only the interpolation type flag specified
}
if (interpTypeFlag == Integer.MIN_VALUE) {
return null;statusFlag; //only the status flag type specified
}
Stringreturn strOffsetstatusFlag = buffer.substring(3);
+ interpTypeFlag;
TimeZone timeZone;}
private static int parseIntFlag(String trybuffer) {
int flag;
double offset = Double.parseDouble(strOffset);try {
timeZoneflag = TimeZoneUtilsTextUtils.createTimeZoneFromDoubleparseInt(offsetbuffer);
} catch (NumberFormatException e) {
throwflag new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e)= Integer.MIN_VALUE;
}
return timeZoneflag;
}
}
|