Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Code Block
/* ================================================================
 * Delft FEWS
 * ================================================================
 *
 * Project Info:  http://www.wldelft.nl/soft/fews/index.html
 * Project Lead:  Karel Heynert (karel.heynert@wldelft.nl)
 *
 * (C) Copyright 2003, by WL | Delft Hydraulics
 *                        P.O. Box 177
 *                        2600 MH  Delft
 *                        The Netherlands
 *                        http://www.wldelft.nl
 *
 * DELFT-FEWS is a sophisticated collection of modules designed
 * for building a FEWS customised to the specific requirements
 * of individual agencies. An open modelling approach allows users
 * to add their own modules in an efficient way.
 *
 * ----------------------------------------------------------------
 * ShefTimeSeriesParser
 * ----------------------------------------------------------------
 * (C) Copyright 2003, by WL | Delft Hydraulics
 *
 * Original Author:  Arjen Markus
 */

package nl.wldelft.fews.system.plugin.dataImport;

import nl.wldelft.util.ExceptionUtils;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.io.FileParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.RelativeEquidistantTimeStep;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import org.apache.log4j.Logger;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.regex.Pattern;


/**
 * TimeSeries parser for SHEF version 2.0. Note: only the .E and .A messages are implemented and
 * http://www.nws.noaa.gov/os/hod/SHManual/SHMan051_shef.htm
 * http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf
 */
public class ShefTimeSeriesParser implements FileParser<TimeSeriesContentHandler> {
    private static final Logger log = Logger.getLogger(ShefTimeSeriesParser.class);

    public static final String readerType = "SHEF";
    private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE = ".A";
    private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE = ".AR";    
    private static final Pattern COMPILE_PATTERN_ER_AR = Pattern.compile("\\.ER?$|.AR?$");
    private static final Pattern COMPILE_PATTERN_ER = Pattern.compile("\\.ER?$");
    private static final Pattern COMPILE_PATTERN_ER_D_AR_D = Pattern.compile("\\.ER?\\d+|\\.AR?\\d+");
    private static final Pattern COMPILE_PATTERN_D_STAR = Pattern.compile("DS.*|DN.*|DH.*|DD.*|DM.*|DY.*|DJ.*|DR.*");
    private static final Pattern COMPILE_PATTERN_D = Pattern.compile("D.*|");
    private static final Pattern COMPILE_PATTERN_DI = Pattern.compile("DI.*");
    private static final Pattern COMPILE_PATTERN_DH_DD = Pattern.compile("DH\\d\\d.*");
    private static final Pattern COMPILE_PATTERN_START_DIGIT = Pattern.compile("[^0-9]");
    private static final Pattern COMPILE_PATTERN_DIH = Pattern.compile(".*DIH.*");
    private static final Pattern COMPILE_PATTERN_DIN = Pattern.compile(".*DIN.*");
    private static final Pattern COMPILE_PATTERN_DID = Pattern.compile(".*DID.*");

    private char quoteChar = '\"';

    private Calendar calendar = new GregorianCalendar();

    private TimeSeriesContentHandler contentHandler = null;

    // Variables for parsing the file
    private long time = 0;
    private long dtime = 0;
    private boolean separatorOnLastLine = false;

    private static final int E_TYPE = 0;
    private static final int A_TYPE = 1;
    private int messageType = 0;


    // string array contains all fields before the first / separator
    private int firstSlashSeparatorIdx = 0;


    @Override
    public void parse(File file, TimeSeriesContentHandler contentHandler) throws IOException {
        this.contentHandler = contentHandler;
        calendar.setTimeZone(this.contentHandler.getDefaultTimeZone());

        boolean isValid = readFile(file);
        if (!isValid) {
            throw new IOException("Error parsing: " + file.getName());
        }


    }

    private boolean readFile(File file) {

        BufferedReader reader = null;
        boolean done = false;

        //Open file
        try {
            //noinspection resource
            reader = new BufferedReader(new FileReader(file));
            done = true;
        } catch (FileNotFoundException e) {
            log.error(file + " could not be opened.", e);
        }
        //Read/parse the file
        if (done) {

            try {
                if (!parseFile(reader)) {
                    done = false;
                    log.error("The file " + file + " has unknown format.");
                }
            } catch (IOException e) {
                done = false;
                log.error("Error while reading the file " +
                        file + " : " + ExceptionUtils.getMessage(e), e);
            }

            closeReader(reader);

        }

        return done;
    }

    private static void closeReader(BufferedReader reader) {

        try {
            reader.close();
        } catch (IOException e) {
            log.error("Cannot close file " + reader +
                    " : " + ExceptionUtils.getMessage(e), e);
        }

    }


    /**
     * Read file content and store it into the memory
     * Comments on the SHEF file format:
     * - The method recognises .ER and .Ed lines only (d=digit)
     * - All other lines are ignored at the moment
     * <p/>
     * Note:
     * We assume that the fields are separated by spaces and that the
     * .ER and .E records do not contain timeseries data!
     *
     * @return
     * @throws IOException
     */
    private boolean parseFile(BufferedReader reader) throws IOException {

        boolean okay = true;
        String line;

        while ((line = reader.readLine()) != null && okay) {
            StringBuilder commentFreeLine = removeCommentsFromLine(line);
            String[] pieces = TextUtils.split(commentFreeLine.toString(), ':', '\0', quoteChar, false);
            //noinspection UnusedAssignment
            String[] fields = TextUtils.split(pieces[0], ' ', '\0', quoteChar, true);

            // get fields, first part is split by space next part by '/'

            // first split with separator '/' will split up line in first part containing spaces (position part)
            // followed by the datastring fields (separated by '/')
            String[] tmpPieces = pieces[0].split("/");
            String[] positionFields = TextUtils.split(tmpPieces[0], ' ', '\"');
            firstSlashSeparatorIdx = positionFields.length;
            fields = new String[tmpPieces.length + positionFields.length - 1];
            System.arraycopy(positionFields, 0, fields, 0, positionFields.length);
            System.arraycopy(tmpPieces, 1, fields, positionFields.length, tmpPieces.length - 1);

            //Header lines start with .E or .ER (but may comtain data)
            //Data-only lines start with .Ed or .ERd - d a digit
            if (fields.length > 0) {
                if (COMPILE_PATTERN_ER_AR.matcher(fields[0]).matches()) {
                    this.messageType = COMPILE_PATTERN_ER.matcher(fields[0]).matches() ? E_TYPE : A_TYPE;

                    // bug in OHD output for .A messages. missing '/' slash between parameter id and
                    // value. Remove this when fixed
                    if (this.messageType == A_TYPE) {
                        String[] splitfields = TextUtils.split(fields[fields.length - 1], ' ', '\"');
                        if (splitfields.length > 1) {
                            String[] tmpFields = new String[fields.length + 1];
                            System.arraycopy(fields, 0, tmpFields, 0, fields.length - 1);
                            System.arraycopy(splitfields, 0, tmpFields, fields.length - 1, 2);
                            fields = tmpFields;
                        }
                    }
                    if (SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE.equals(fields[0])) {
                        // no headers all data is on one line with multiple parameters.
                        parseMultiParameterSeriesData(fields);
                    } else {
                        // .AR type. Revision on earlier measurement. Assumption is that only one parameter at a time is passed.
                        okay = getSeriesParameters(fields);
                        if (okay) {
                            // see if there are any values on this row, if so start fill series data
                            // because parameter and timestep are mandatory values can be started
                            // from firstSlashSeparator untill end of fields
                            if (fields.length == (firstSlashSeparatorIdx + 2) && SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE.equals(fields[0])) {
                                String value = fields[fields.length - 1];
                                okay = getContinuationSeriesData(value);
                            }
                            if (fields.length > firstSlashSeparatorIdx + 2) {

                                for (int i = firstSlashSeparatorIdx + 2; i < fields.length; i++) {
                                    if (isFloat(fields[i])) {
                                        String[] values = new String[fields.length - i];
                                        System.arraycopy(fields, i, values, 0, fields.length - i);
                                        okay = getSeriesData(values);
                                        break;
                                    }
                                }
                            }
                        }
                    }

                } else if (COMPILE_PATTERN_ER_D_AR_D.matcher(fields[0]).matches()) {
                    // continued line
                    String[] datafields;
                    if (positionFields.length == 1 && separatorOnLastLine) {
                        // slash between rowcontinuation and first value. If previous line ended with a slash
                        // a null value is assumed
                        fields[0] = null;
                        datafields = fields;
                    } else {
                        // skip first column with normal linecontinuation
                        datafields = new String[fields.length - 1];
                        System.arraycopy(fields, 1, datafields, 0, fields.length - 1);
                    }
                    okay = getSeriesData(datafields);
                }
                separatorOnLastLine = line.endsWith("/");
            }
        }

        return okay;
    }

    private StringBuilder removeCommentsFromLine(String line) {
        //Split the line into separate fields:
        //Remove any comment first
        String[] piecesWithComments = TextUtils.split(line, ':', ':', quoteChar, true);
        StringBuilder commentFreeLine = new StringBuilder(line.length());
        if (!line.contains(":")) {
            commentFreeLine.append(line);
        } else {
            int togglePosition = 0;
            if (line.startsWith(":")) {
                togglePosition = 1; // if the line starts with a : , the first entry should be skipped.
            }
            if (piecesWithComments.length > 0) {
                // we found some comments.
                for (int i = 0; i < piecesWithComments.length; i++) {
                    if (i % 2 == togglePosition) {
                        // the comment toggle is off.
                        // see: http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf
                        commentFreeLine.append(piecesWithComments[i]);
                    }
                }
            }
        }
        return commentFreeLine;
    }

    private static boolean isFloat(String value) {
        try {
            //noinspection UnusedDeclaration
            Float f = TextUtils.parseFloat(value);
        } catch (NumberFormatException e) {
            return false;
        }
        return true;
    }

    private void parseMultiParameterSeriesData(String[] fields) {
        // .A ANAW1 20170215 P DH2400 /DH08 /HGIRX 8.37 /QRIRX 41.69
        // Couting fields from 0:
        // Field 1 is the name of the location
        // Field 2 is the date (possibly without a year)

        // Skip all /D parts.
        
        // parameter code 1
        // parameter value 1
        // ..
        // parameter code N
        // parameter value  N

        // get location id
        String locationId = fields[1];
        String date = fields[2];
        String observationTime = "";

        // get time, as a combination of date, [observation time]
        // get observation time if exist
        int ifield = 3;
        for (; ifield < fields.length; ifield++) {
            if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) {
                observationTime = fields[ifield];
                break;
            }
        }
        // The start date/time, and the time step:
        time = parseDate(date, observationTime);

        DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader();
        timeSeriesHeader.setLocationId(locationId);
        ifield = firstSlashSeparatorIdx;
        // get parameter id, i.e. the first field without a D in prefix
        for (; ifield < fields.length; ifield++) {
            if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) {
                break;
                // Index of first parameter was found.
            }
        }
        List<String> parameterValueList = new ArrayList<>();
        for (int i = ifield; i < fields.length; i++) {
            if (fields[i] == null) continue;
            String[] result = fields[i].split(" ");
            for (int j = 0; j < result.length; j++) {
                parameterValueList.add(result[j]);
            }
        }
        if (parameterValueList.size() % 2 != 0) {
            log.warn("SHEF import line of type .A (single station, multiple parameters) doesn't contain a consistent number of parameters and values. Skipping line");
            return;
        }
        for (int i = 0; i< parameterValueList.size() / 2; i++) {
            String paramId = parameterValueList.get(i*2);
            float value = parseValue(parameterValueList.get(i*2 + 1));
            timeSeriesHeader.setParameterId(paramId);
            contentHandler.setTimeSeriesHeader(timeSeriesHeader);
            contentHandler.setTime(time);
            contentHandler.setValue(value);
            contentHandler.applyCurrentFields();
        }
    }
    
    private boolean getSeriesParameters(String[] fields) {
        // Couting fields from 0:
        // Field 1 is the name of the location
        // Field 2 is the date (possibly without a year)
        // Field 3 is the timezone (optional!)
        // observation time (optional)
        // creation date (optional)
        // units code (optional)
        // Data string qualifier (optional)
        // Duration code (optional)
        // parameter code
        // the time interval (only for .E messages)

        // get location id
        String locationId = fields[1];
        String date = fields[2];
        String observationTime = "";
        // get time, as a combination of date, [observation time]
        // get observation time if exist
        int ifield = 3;
        for (; ifield < fields.length; ifield++) {
            if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) {
                observationTime = fields[ifield];
                break;
            }
        }

        // The start date/time, and the time step:
        time = parseDate(date, observationTime);

        ifield = firstSlashSeparatorIdx;
        // get parameter id, i.e. the first field without a D in prefix
        String parameterId = null;
        for (; ifield < fields.length; ifield++) {
            if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) {
                parameterId = fields[ifield];
                break;
            }
        }

        // get timestep field, mandatory only for .E messages
        for (; ifield < fields.length; ifield++) {
            if (COMPILE_PATTERN_DI.matcher(fields[ifield]).matches()) {
                String timestep = fields[ifield];
                dtime = parseTimeStep(timestep);
                break;
            }
        }

        DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader();

        timeSeriesHeader.setLocationId(locationId);
        timeSeriesHeader.setParameterId(parameterId);

        if (this.messageType == E_TYPE) {
            timeSeriesHeader.setTimeStep(RelativeEquidistantTimeStep.getInstance(dtime, time));
            timeSeriesHeader.setForecastTime(time);
        }
        contentHandler.setTimeSeriesHeader(timeSeriesHeader);

        if (this.messageType == E_TYPE) {
            return time != 0 && dtime != 0; //AM: error conditions?
        } else {
            return time != 0;
        }
    }

    private boolean getSeriesData(String[] fields) {

        for (int i = 0; i < fields.length; i++) {
            float value = parseValue(fields[i]);

            contentHandler.setTime(time);
            contentHandler.setValue(value);
            contentHandler.applyCurrentFields();

            time += dtime;
        }
        return true;
    }

    private boolean getContinuationSeriesData(String valueString) {
        float value = parseValue(valueString);
        contentHandler.setTime(time);
        contentHandler.setValue(value);
        contentHandler.applyCurrentFields();
        return true;
    }


    private long parseDate(String str, String timestr) {

        int year;
        int month;
        int day;

        if (str.length() == 8) {
            year = Integer.parseInt(str.substring(0, 4));
            month = Integer.parseInt(str.substring(4, 6));
            day = Integer.parseInt(str.substring(6, 8));
        } else {

            if (str.length() == 6) {
                year = 100 * (calendar.get(Calendar.YEAR) / 100) + Integer.parseInt(str.substring(0, 2));
                month = Integer.parseInt(str.substring(2, 4));
                day = Integer.parseInt(str.substring(4, 6));
            } else {
                // only only month and day are given according to shef 2.0 spec take the last 12 months
                // before current and take the year that matches the month day. i.e. check if date is in future
                // with current year, if so take last year
                year = calendar.get(Calendar.YEAR);
                month = Integer.parseInt(str.substring(0, 2));
                day = Integer.parseInt(str.substring(2, 4));
            }

            if (month > calendar.get(Calendar.MONTH) + 5) {
                year--;
            }
            if (month < calendar.get(Calendar.MONTH) - 6) {
                year++;
            }
        }

        // The time string (actually a composite thing)
        // We expect something like: DH12/... If not, ignore this field
        // -- AM: TODO!
        int hour = 0;
        int minute = 0;
        int second = 0;
        if (COMPILE_PATTERN_DH_DD.matcher(timestr).matches()) {
            hour = Integer.parseInt(timestr.substring(2, 4));
            if (timestr.length() == 6) {
                minute = TextUtils.tryParseInt(timestr.substring(4, 6), 0);
            }
            if (timestr.length() == 8) {
                second = TextUtils.tryParseInt(timestr.substring(6, 8), 0);
            }

        }

        calendar.clear();
        //noinspection MagicConstant
        calendar.set(year, month - 1, day, hour, minute, second); /* Correct for the offset: month numbers start at 0*/
        return calendar.getTimeInMillis();
    }

    private static long parseTimeStep(String str) {

        long scale;
        long value = Long.parseLong(COMPILE_PATTERN_START_DIGIT.matcher(str).replaceAll(""));

        if (COMPILE_PATTERN_DIH.matcher(str).matches()) {
            scale = 3600 * 1000;
        } else if (COMPILE_PATTERN_DIN.matcher(str).matches()) {
            scale = 60 * 1000;
        } else if (COMPILE_PATTERN_DID.matcher(str).matches()) {
            scale = 86400 * 1000;
        } else {
            scale = 0;
        }

        return scale * value;
    }

    private static float parseValue(String valueText) {

        float value = Float.NaN;

        if (valueText != null) {
            valueText = valueText.trim();
            if (!valueText.isEmpty()) {
                // Missing value can be marked +, -, m, mm, M, MM, -9999
                if (!(valueText.equalsIgnoreCase("M") || valueText.equalsIgnoreCase("MM") || valueText.equalsIgnoreCase("-") ||
                        valueText.equalsIgnoreCase("+") || valueText.equalsIgnoreCase("-9999"))) {
                    try {
                        value = TextUtils.parseFloat(valueText);
                    } catch (NumberFormatException e) {
                        // TODO : According to the specs of SHEF:
                        // If no legitimate value is found, the value is treated as a null field or no report.
                        // So we should return missingValue
                        value = Float.NaN;
                    }
                }
            }
        }
        return value;
    }
}