package nl.wldelft.fews.system.plugin.dataImport;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import java.io.IOException;
import java.util.TimeZone;
import org.apache.log4j.Logger;
public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = Logger.getLogger(WiskiTimeSeriesParser.class);
private LineReader reader = null;
private TimeSeriesContentHandler contentHandler = null;
private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
private TimeZone headerTimeZone; //timeZone read from the file header
private String virtualFileName;
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception {
this.virtualFileName = virtualFileName;
this.contentHandler = contentHandler;
this.contentHandler.addMissingValue(-777.0f);
this.reader = reader;
this.reader.setCommentLinePrefix('?');
this.reader.setSkipEmptyLines(true);
this.header.clear();
this.headerTimeZone = null;
reader.mark(500);
String[] buffer = new String[2];
for (String line; (line = reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.equals("ENDOFFILE")) return;
if (line.charAt(0) == '#') {
reader.reset();
parseHeader();
continue;
}
if (this.header.getLocationId() == null && this.header.getParameterId() == null)
throw new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in the file header");
if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continue;
TextUtils.split(line, ' ', buffer);
if (this.headerTimeZone != null) {
contentHandler.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[0]);
} else {
contentHandler.setTime(contentHandler.getDefaultTimeZone(), "yyyyMMddHHmmss", buffer[0]);
}
contentHandler.setValue('.', buffer[1]);
contentHandler.applyCurrentFields();
}
}
/**
* Read metadata from the #-records. Metadata block is followed by the timeseries-records
* but the timeseries-records may be also omitted. In this case the Metadata block MUST start
* with a record that begins with ## !
* Empty records wil be ignored.
* <p/>
* The meaning of the keys is:
* TZ : time zone. TZ are UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
* TSPATH : /site id/location id/parameter id/ts shortname
* example TSPATH/160/160_1/WATHTE/cmd.p
* only location id and parameter id is parsed and used
* SANR : location id. Used only if not specified with TSPATH
* CNAME: parameter id. Used only if not specified with TSPATH
* CUNIT: unit
* RINVAL: missing value
* REXCHANGE: location-parameter. Wil be used only if the metadata block does not contain keys TSPATH, SANR or CNAME.
* The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals)
*
* @throws IOException if the header format is incorrect
*/
private void parseHeader() throws IOException {
this.header.clear();
this.headerTimeZone = null;
String tspathPar = null;
String tspathQual = null;
String tspathLoc = null;
String fallbackParLoc = null;
for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) {
line = line.trim();
if (line.charAt(0) != '#') {
reader.reset();
break;
}
String tzString = parseKeyValue("TZ", line);
if (tzString != null) {
this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID());
}
//Parse location id and parameter specified with keyword TSPATH
//format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>
//example: TSPATH/160/160_1/WATHTE/cmd.p (contains always all these 4 elements )
//<ts shortname> is read as qualifier
String tspath = parseKeyValue("TSPATH", line);
if (tspath != null) {
String[] buffer = TextUtils.split(tspath, '/');
if (buffer.length != 5 || buffer[2].length() < 1 || buffer[3].length() < 1) {
throw new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath +
" expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>");
}
tspathLoc = buffer[2];
tspathPar = buffer[3];
tspathQual = buffer[4].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscores
}
String locationId = parseKeyValue("SANR", line);
if (locationId != null) header.setLocationId(locationId);
String parameterId = parseKeyValue("CNAME", line);
if (parameterId != null) header.setParameterId(parameterId);
String unit = parseKeyValue("CUNIT", line);
if (unit != null) header.setUnit(unit);
String missingValue = parseKeyValue("RINVAL", line);
if (missingValue != null) contentHandler.addMissingValue(missingValue);
String parLoc = parseKeyValue("REXCHANGE", line);
if (parLoc != null) fallbackParLoc = parLoc;
}
if (tspathPar != null && tspathLoc != null) {
//If par id, qualifier id and loc are specified with TSPATH, use them , even if the keywords SANR and SNAME are also present in the file
header.setParameterId(tspathPar);
header.setQualifierIds(tspathQual);
header.setLocationId(tspathLoc);
} else if (header.getParameterId() == null || header.getLocationId() == null) {
header.setParameterId(fallbackParLoc);
header.setLocationId(fallbackParLoc);
}
contentHandler.setTimeSeriesHeader(header);
}
//Returns value or null if the key not found in the buffer
private static String parseKeyValue(String key, String buffer) {
int keyPos = buffer.indexOf(key);
if (keyPos == -1) return null;
int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length());
if (endValuePos == -1) return null;
return buffer.substring(keyPos + key.length(), endValuePos);
}
//Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format
//Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {
if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) {
log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used.");
return null;
}
String strOffset = buffer.substring(3);
TimeZone timeZone;
try {
double offset = Double.parseDouble(strOffset);
timeZone = TimeZoneUtils.createTimeZoneFromDouble(offset);
} catch (NumberFormatException e) {
throw new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e);
}
return timeZone;
}
}
|