package nl.wldelft.fews.system.plugin.dataImport; import nl.wldelft.util.TextUtils; import nl.wldelft.util.TimeZoneUtils; import nl.wldelft.util.io.LineReader; import nl.wldelft.util.io.TextParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import java.io.IOException; import java.util.TimeZone; import org.apache.log4j.Logger; public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> { private static final Logger log = Logger.getLogger(WiskiTimeSeriesParser.class); private LineReader reader = null; private TimeSeriesContentHandler contentHandler = null; private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader(); private TimeZone headerTimeZone; //timeZone read from the file header private String virtualFileName; @Override public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception { this.virtualFileName = virtualFileName; this.contentHandler = contentHandler; this.contentHandler.addMissingValue(-777.0f); this.reader = reader; this.reader.setCommentLinePrefix('?'); this.reader.setSkipEmptyLines(true); this.header.clear(); this.headerTimeZone = null; reader.mark(500); String[] buffer = new String[2]; for (String line; (line = reader.readLine()) != null; reader.mark(500)) { line = line.trim(); if (line.equals("ENDOFFILE")) return; if (line.charAt(0) == '#') { reader.reset(); parseHeader(); continue; } if (this.header.getLocationId() == null && this.header.getParameterId() == null) throw new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in the file header"); if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continue; TextUtils.split(line, ' ', buffer); if (this.headerTimeZone != null) { contentHandler.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[0]); } else { contentHandler.setTime(contentHandler.getDefaultTimeZone(), "yyyyMMddHHmmss", buffer[0]); } contentHandler.setValue('.', buffer[1]); contentHandler.applyCurrentFields(); } } /** * Read metadata from the #-records. Metadata block is followed by the timeseries-records * but the timeseries-records may be also omitted. In this case the Metadata block MUST start * with a record that begins with ## ! * Empty records wil be ignored. * <p/> * The meaning of the keys is: * TZ : time zone. TZ are UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2). * TSPATH : /site id/location id/parameter id/ts shortname * example TSPATH/160/160_1/WATHTE/cmd.p * only location id and parameter id is parsed and used * SANR : location id. Used only if not specified with TSPATH * CNAME: parameter id. Used only if not specified with TSPATH * CUNIT: unit * RINVAL: missing value * REXCHANGE: location-parameter. Wil be used only if the metadata block does not contain keys TSPATH, SANR or CNAME. * The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals) * * @throws IOException if the header format is incorrect */ private void parseHeader() throws IOException { this.header.clear(); this.headerTimeZone = null; String tspathPar = null; String tspathQual = null; String tspathLoc = null; String fallbackParLoc = null; for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) { line = line.trim(); if (line.charAt(0) != '#') { reader.reset(); break; } String tzString = parseKeyValue("TZ", line); if (tzString != null) { this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID()); } //Parse location id and parameter specified with keyword TSPATH //format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname> //example: TSPATH/160/160_1/WATHTE/cmd.p (contains always all these 4 elements ) //<ts shortname> is read as qualifier String tspath = parseKeyValue("TSPATH", line); if (tspath != null) { String[] buffer = TextUtils.split(tspath, '/'); if (buffer.length != 5 || buffer[2].length() < 1 || buffer[3].length() < 1) { throw new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath + " expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>"); } tspathLoc = buffer[2]; tspathPar = buffer[3]; tspathQual = buffer[4].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscores } String locationId = parseKeyValue("SANR", line); if (locationId != null) header.setLocationId(locationId); String parameterId = parseKeyValue("CNAME", line); if (parameterId != null) header.setParameterId(parameterId); String unit = parseKeyValue("CUNIT", line); if (unit != null) header.setUnit(unit); String missingValue = parseKeyValue("RINVAL", line); if (missingValue != null) contentHandler.addMissingValue(missingValue); String parLoc = parseKeyValue("REXCHANGE", line); if (parLoc != null) fallbackParLoc = parLoc; } if (tspathPar != null && tspathLoc != null) { //If par id, qualifier id and loc are specified with TSPATH, use them , even if the keywords SANR and SNAME are also present in the file header.setParameterId(tspathPar); header.setQualifierIds(tspathQual); header.setLocationId(tspathLoc); } else if (header.getParameterId() == null || header.getLocationId() == null) { header.setParameterId(fallbackParLoc); header.setLocationId(fallbackParLoc); } contentHandler.setTimeSeriesHeader(header); } //Returns value or null if the key not found in the buffer private static String parseKeyValue(String key, String buffer) { int keyPos = buffer.indexOf(key); if (keyPos == -1) return null; int endValuePos = buffer.indexOf(";*;", keyPos + key.length()); if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length()); if (endValuePos == -1) return null; return buffer.substring(keyPos + key.length(), endValuePos); } //Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format //Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2). private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException { if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) { log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used."); return null; } String strOffset = buffer.substring(3); TimeZone timeZone; try { double offset = Double.parseDouble(strOffset); timeZone = TimeZoneUtils.createTimeZoneFromDouble(offset); } catch (NumberFormatException e) { throw new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e); } return timeZone; } }