package nl.wldelft.timeseriesparsers; import nl.wldelft.util.DateUtils; import nl.wldelft.util.TextUtils; import nl.wldelft.util.TimeUnit; import nl.wldelft.util.TimeZoneUtils; import nl.wldelft.util.io.LineReader; import nl.wldelft.util.io.TextParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import nl.wldelft.util.timeseries.TimeStep; import org.apache.log4j.Logger; import java.io.IOException; import java.text.ParseException; import java.util.TimeZone; /* * * Imports time series data in a tabular ASCII format from the Australian Bureau of * Meteorology (HCS). The files consist of a set of header lines and then lines * with a fixed number of fields. The fields are separated by a comma and the order is fixed. * Multiple locations and parameters can be put in a single file. * * Example: * # HEADER: Agency Id: BoM * # HEADER: File Generation Date: 2008-08-01T14:30:45z * # HEADER: File Format: BOM-HCS * # HEADER: File Format Version: 2.0 * # HEADER: Generated by (system): TimeStudio * # HEADER: Number of Records: 8 * # HEADER: Local ObsTime Offset: 0 * # HEADER: Data Fields: IndexNo, SensorType, SensorDataType, SiteIdType, SiteId, ObservationTimestamp, RealValue, Unit, SensorTypeParam1, SensorTypeParam2, Quality, Comment * 1,"WL",1,"SLSR","44198-01-01","2008-02-01T01:07:06z",1.150000,"metres","LGH",,1,"" * 2,"WL",1,"SLSR","44198-01-01","2008-02-01T01:08:06z",1.200000,"metres","LGH",,1,"" * 3,"WL",1,"SLSR","44198-01-01","2008-02-01T01:43:06z",1.150000,"metres","LGH",,1,"" * 4,"WL",1,"SLSR","44198-01-01","2008-02-01T01:46:06z",1.200000,"metres","LGH",,1,"" * 5,"WL",1,"SLSR","44198-01-01","2008-02-01T02:04:06z",1.150000,"metres","LGH",,1,"" * 6,"WL",1,"SLSR","44198-01-01","2008-02-01T02:11:06z",1.200000,"metres","LGH",,1,"" * 7,"WL",1,"SLSR","44198-01-01","2008-02-01T02:16:06z",1.150000,"metres","LGH",,1,"" * 8,"WL",1,"SLSR","44198-01-01","2008-02-01T02:22:06z",1.200000,"metres","LGH",,1,"" * */ public class HcsTimeSeriesParser implements TextParser<TimeSeriesContentHandler> { private static final Logger log = Logger.getLogger(HcsTimeSeriesParser.class); private TimeZone defaultTimeZone = null; private LineReader reader = null; private TimeSeriesContentHandler handler = null; @Override public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler handler) throws IOException { this.reader = reader; this.handler = handler; handler.addMissingValue(""); DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader(); reader.setSkipEmptyLines(true); parseHeader(); for (String[] buffer = new String[12]; reader.readLine(',', '\"', buffer) != -1;) { header.setParameterId(buffer[1]); header.setLocationId(buffer[4]); String timeText = buffer[5]; if (TextUtils.trimToNull(timeText) == null) continue; TimeZone timeZone = defaultTimeZone; if (TextUtils.endsWithIgnoreCase(timeText, "Z")) { timeZone = DateUtils.GMT; timeText = timeText.substring(0, timeText.length() - 1); } handler.setTime(timeZone, "yyyy-MM-dd'T'HH:mm:ss", timeText); handler.setValue('.', buffer[6]); header.setUnit(buffer[7]); header.setTimeStep(parseTimeStep(buffer, timeZone)); handler.setFlag(buffer[10]); handler.setComment(buffer[11]); handler.setTimeSeriesHeader(header); handler.applyCurrentFields(); } } private TimeStep parseTimeStep(String[] buffer, TimeZone timeZone) { if (TextUtils.trimToNull(buffer[9]) == null) return null; if (TextUtils.equals(buffer[2], "1")) return null; try { return SimpleEquidistantTimeStep.getInstanceFromTimeZone(TextUtils.parseInt(buffer[9]) * TimeUnit.SECOND_MILLIS, timeZone); } catch (NumberFormatException e) { log.error("Can not parse time step " + buffer[9] + " at " + reader.getFileAndLineNumber()); return null; } } private void parseHeader() throws IOException { defaultTimeZone = handler.getDefaultTimeZone(); reader.mark(200); for (String line; (line = this.reader.readLine()) != null; reader.mark(200)) { line = line.trim(); if (line.charAt(0) != '#') { // this is not a header row, undo read line reader.reset(); break; } // Supported formats: // # HEADER: Local ObsTime Offset: 1:30 // # HEADER: Local ObsTime Offset: +01:30 // # HEADER: Local ObsTime Offset: 9 // # HEADER: Local ObsTime Offset: -01:00 String[] elements = TextUtils.split(line, ':'); if (elements.length < 3) continue; TextUtils.trimElements(elements); if (!elements[1].equalsIgnoreCase("Local ObsTime Offset")) continue; String timeZone = elements.length >= 4 ? elements[2] + ':' + elements[3] : elements[2] + ':' + "00"; timeZone = timeZone.length() <= 4 ? '0' + timeZone : timeZone; timeZone = timeZone.charAt(0) == '+' || timeZone.charAt(0) == '-' ? timeZone : '+' + timeZone; try { defaultTimeZone = TimeZoneUtils.parseTimeZone("GMT" + timeZone); } catch (ParseException e) { log.error("Can not parse time zone " + timeZone + " at " + reader.getFileAndLineNumber()); } } } }