Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Code Block
package nl.wldelft.timeseriesparsers;

import nl.wldelft.util.DateUtils;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeUnit;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import nl.wldelft.util.timeseries.TimeStep;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.text.ParseException;
import java.util.TimeZone;

/*
 * 
 * Imports time series data in a tabular ASCII format from the Australian Bureau of
 * Meteorology (HCS). The files consist of a set of header lines and then lines 
 * with a fixed number of fields. The fields are separated by a comma and the order is fixed. 
 * Multiple locations and parameters can be put in a single file.
 * 
 * Example: 
 * # HEADER: Agency Id: BoM
 * # HEADER: File Generation Date: 2008-08-01T14:30:45z
 * # HEADER: File Format: BOM-HCS
 * # HEADER: File Format Version: 2.0
 * # HEADER: Generated by (system): TimeStudio
 * # HEADER: Number of Records: 8
 * # HEADER: Local ObsTime Offset: 0
 * # HEADER: Data Fields: IndexNo, SensorType, SensorDataType, SiteIdType, SiteId, ObservationTimestamp, RealValue, Unit, SensorTypeParam1, SensorTypeParam2, Quality, Comment
 * 1,"WL",1,"SLSR","44198-01-01","2008-02-01T01:07:06z",1.150000,"metres","LGH",,1,""
 * 2,"WL",1,"SLSR","44198-01-01","2008-02-01T01:08:06z",1.200000,"metres","LGH",,1,""
 * 3,"WL",1,"SLSR","44198-01-01","2008-02-01T01:43:06z",1.150000,"metres","LGH",,1,""
 * 4,"WL",1,"SLSR","44198-01-01","2008-02-01T01:46:06z",1.200000,"metres","LGH",,1,""
 * 5,"WL",1,"SLSR","44198-01-01","2008-02-01T02:04:06z",1.150000,"metres","LGH",,1,""
 * 6,"WL",1,"SLSR","44198-01-01","2008-02-01T02:11:06z",1.200000,"metres","LGH",,1,""
 * 7,"WL",1,"SLSR","44198-01-01","2008-02-01T02:16:06z",1.150000,"metres","LGH",,1,""
 * 8,"WL",1,"SLSR","44198-01-01","2008-02-01T02:22:06z",1.200000,"metres","LGH",,1,""
 * 
 */

public class HcsTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
    private static final Logger log = Logger.getLogger(HcsTimeSeriesParser.class);
    private TimeZone defaultTimeZone = null;
    private LineReader reader = null;
    private TimeSeriesContentHandler handler = null;

    @Override
    public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler handler) throws IOException {
        this.reader = reader;
        this.handler = handler;

        handler.addMissingValue("");
        DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
        reader.setSkipEmptyLines(true);

        parseHeader();

        for (String[] buffer = new String[12]; reader.readLine(',', '\"', buffer) != -1;) {
            header.setParameterId(buffer[1]);
            header.setLocationId(buffer[4]);
            String timeText = buffer[5];
            if (TextUtils.trimToNull(timeText) == null) continue;
            TimeZone timeZone = defaultTimeZone;
            if (TextUtils.endsWithIgnoreCase(timeText, "Z")) {
                timeZone = DateUtils.GMT;
                timeText = timeText.substring(0, timeText.length() - 1);
            }
            handler.setTime(timeZone, "yyyy-MM-dd'T'HH:mm:ss", timeText);
            handler.setValue('.', buffer[6]);
            header.setUnit(buffer[7]);
            header.setTimeStep(parseTimeStep(buffer, timeZone));
            handler.setFlag(buffer[10]);
            handler.setComment(buffer[11]);
            handler.setTimeSeriesHeader(header);
            handler.applyCurrentFields();
        }
    }

    private TimeStep parseTimeStep(String[] buffer, TimeZone timeZone) {
        if (TextUtils.trimToNull(buffer[9]) == null) return null;
        if (TextUtils.equals(buffer[2], "1")) return null;
        try {
            return SimpleEquidistantTimeStep.getInstanceFromTimeZone(TextUtils.parseInt(buffer[9]) * TimeUnit.SECOND_MILLIS, timeZone);
        } catch (NumberFormatException e) {
            log.error("Can not parse time step " + buffer[9] + " at  " + reader.getFileAndLineNumber());
            return null;
        }
    }

    private void parseHeader() throws IOException {
        defaultTimeZone = handler.getDefaultTimeZone();
        reader.mark(200);
        for (String line; (line = this.reader.readLine()) != null; reader.mark(200)) {
            line = line.trim();
            if (line.charAt(0) != '#') {
                //  this is not a header row, undo read line
                reader.reset();
                break;
            }
            
            // Supported formats:
            // # HEADER: Local ObsTime Offset: 1:30
            // # HEADER: Local ObsTime Offset: +01:30
            // # HEADER: Local ObsTime Offset: 9
            // # HEADER: Local ObsTime Offset: -01:00
            String[] elements = TextUtils.splitStringsplit(line, ':');
            if (elements.length < 3) continue;
            TextUtils.trimElements(elements);
            if (!elements[1].equalsIgnoreCase("Local ObsTime Offset")) continue;

            String timeZone =  elements.length >= 4 ? elements[2] + ':' + elements[3] : elements[2] + ':' + "00";
            timeZone = timeZone.length() <= 4 ? '0' + timeZone : timeZone;
            timeZone = timeZone.charAt(0) == '+' || timeZone.charAt(0) == '-' ? timeZone : '+' + timeZone;
            try {
            	    	defaultTimeZone = TimeZoneUtils.parseTimeZone("GMT" + timeZone);
            	 } catch (ParseException e) {
                log.error("Can not parse time zone " + timeZone + " at  " + reader.getFileAndLineNumber());
            }
        }
    }
}