You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 2 Next »

package nl.wldelft.fews.system.plugin.dataImport;


import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;

import java.io.IOException;
import java.util.TimeZone;

import org.apache.log4j.Logger;

public class WiskiTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {

    private static final Logger log = Logger.getLogger(WiskiTimeSeriesParser.class);

    private LineReader reader = null;
    private TimeSeriesContentHandler contentHandler = null;
    private DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
    private TimeZone headerTimeZone; //timeZone read from the file header
    private String virtualFileName;

    @Override
    public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws Exception {
        this.virtualFileName = virtualFileName;
        this.contentHandler = contentHandler;
        this.contentHandler.addMissingValue(-777.0f);

        this.reader = reader;
        this.reader.setCommentLinePrefix('?');
        this.reader.setSkipEmptyLines(true);

        this.header.clear();
        this.headerTimeZone = null;

        reader.mark(500);
        String[] buffer = new String[2];
        for (String line; (line = reader.readLine()) != null; reader.mark(500)) {
            line = line.trim();
            if (line.equals("ENDOFFILE")) return;

            if (line.charAt(0) == '#') {
                reader.reset();
                parseHeader();
                continue;
            }

            if (this.header.getLocationId() == null && this.header.getParameterId() == null)
                throw new Exception("Not a valid wiski file, REXCHANGE, CNAME, SANR tags are all missing in the file header");

            if (this.contentHandler.isCurrentTimeSeriesHeaderForAllTimesRejected()) continue;

            TextUtils.split(line, ' ', buffer);
            if (this.headerTimeZone != null) {
                contentHandler.setTime(this.headerTimeZone, "yyyyMMddHHmmss", buffer[0]);
            } else {
                contentHandler.setTime(contentHandler.getDefaultTimeZone(), "yyyyMMddHHmmss", buffer[0]);
            }
            contentHandler.setValue('.', buffer[1]);
            contentHandler.applyCurrentFields();
        }
    }

    /**
     * Read metadata from the #-records. Metadata block is followed by the timeseries-records
     * but the  timeseries-records may be also omitted. In this case the Metadata block MUST start
     * with a record that begins with ## !
     * Empty records wil be ignored.
     * <p/>
     * The meaning of the keys is:
     * TZ : time zone. TZ are UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
     * TSPATH :  /site id/location id/parameter id/ts shortname
     * example   TSPATH/160/160_1/WATHTE/cmd.p
     * only location id and parameter id is parsed and used
     * SANR : location id. Used only if not specified with  TSPATH
     * CNAME: parameter id. Used only if not specified with  TSPATH
     * CUNIT: unit
     * RINVAL: missing value
     * REXCHANGE: location-parameter. Wil be used only if the metadata block does not contain keys TSPATH, SANR or CNAME.
     * The string specified by keyword REXCHANGE represents location Id and also parameter-id (so locations Id and parameter Id equals)
     *
     * @throws IOException if the header format is incorrect
     */
    private void parseHeader() throws IOException {
        this.header.clear();
        this.headerTimeZone = null;

        String tspathPar = null;
        String tspathQual = null;
        String tspathLoc = null;
        String fallbackParLoc = null;

        for (String line; (line = this.reader.readLine()) != null; reader.mark(500)) {
            line = line.trim();
            if (line.charAt(0) != '#') {
                reader.reset();
                break;
            }

            String tzString = parseKeyValue("TZ", line);
            if (tzString != null) {
                this.headerTimeZone = parseTimeZone(tzString, this.virtualFileName, this.contentHandler.getDefaultTimeZone().getID());
            }

            //Parse location id and parameter specified with keyword TSPATH
            //format: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>
            //example: TSPATH/160/160_1/WATHTE/cmd.p  (contains always all these 4 elements )
            //<ts shortname> is read as qualifier
            String tspath = parseKeyValue("TSPATH", line);
            if (tspath != null) {
                String[] buffer = TextUtils.split(tspath, '/');
                if (buffer.length != 5 || buffer[2].length() < 1 || buffer[3].length() < 1) {
                    throw new IOException("Not a valid wiski file, TSPATH has a incorrect format: " + tspath +
                            "   expected: TSPATH/<site id>/<station id>/<parameter shortname>/<ts shortname>");
                }
                tspathLoc = buffer[2];
                tspathPar = buffer[3];
                tspathQual = buffer[4].replace('.', '_'); // dots are not allowed in fews as internal qualifiers, replace dots with underscores
            }
            String locationId = parseKeyValue("SANR", line);
            if (locationId != null) header.setLocationId(locationId);
            String parameterId = parseKeyValue("CNAME", line);
            if (parameterId != null) header.setParameterId(parameterId);
            String unit = parseKeyValue("CUNIT", line);
            if (unit != null) header.setUnit(unit);
            String missingValue = parseKeyValue("RINVAL", line);
            if (missingValue != null) contentHandler.addMissingValue(missingValue);
            String parLoc = parseKeyValue("REXCHANGE", line);
            if (parLoc != null) fallbackParLoc = parLoc;

        }

        if (tspathPar != null && tspathLoc != null) {
            //If par id, qualifier id and loc are specified with  TSPATH, use them , even if the keywords SANR and SNAME are also present in the file
            header.setParameterId(tspathPar);
            header.setQualifierIds(tspathQual);
            header.setLocationId(tspathLoc);
        } else if (header.getParameterId() == null || header.getLocationId() == null) {
            header.setParameterId(fallbackParLoc);
            header.setLocationId(fallbackParLoc);
        }
        contentHandler.setTimeSeriesHeader(header);
    }

    //Returns value or null if the key not found in the buffer
    private static String parseKeyValue(String key, String buffer) {
        int keyPos = buffer.indexOf(key);
        if (keyPos == -1) return null;
        int endValuePos = buffer.indexOf(";*;", keyPos + key.length());
        if (endValuePos == -1) endValuePos = buffer.indexOf("|*|", keyPos + key.length());
        if (endValuePos == -1) return null;
        return buffer.substring(keyPos + key.length(), endValuePos);
    }

    //Parse time zone. Note: UTC always expected , since no other code wil occur according to the Wiski 7 format
    //Allowed formats are: UTC0 and UTC+/-x (e.g. UTC+1 or UTC-2).
    private static TimeZone parseTimeZone(String buffer, String fileName, String defaultTimeZone) throws IOException {

        if (buffer.indexOf("UTC") != 0 || buffer.length() < 4) {
            log.warn(fileName + ": invalid timezone specified with TZ keyword - " + buffer + " , " + defaultTimeZone + " wil be used.");
            return null;
        }
        String strOffset = buffer.substring(3);
        TimeZone timeZone;
        try {
            double offset = Double.parseDouble(strOffset);
            timeZone = TimeZoneUtils.createTimeZoneFromDouble(offset);
        } catch (NumberFormatException e) {
            throw new IOException("Invalid timeZone specified with TZ keyword:" + buffer, e);
        }
        return timeZone;
    }
}

  • No labels