package nl.wldelft.timeseriesparsers;

import nl.wldelft.util.TextUtils;
import nl.wldelft.util.TimeUnit;
import nl.wldelft.util.TimeZoneUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import nl.wldelft.util.timeseries.TimeStep;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

import java.io.IOException;
import java.text.ParseException;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
*
* Imports time series data in a tabular ASCII format from the Australian Bureau of
* Meteorology (HCS). The files consist of a set of header lines and then lines
* with a fixed number of fields. The fields are separated by a comma and the order is fixed.
* Multiple locations and parameters can be put in a single file.
*
* Example:
* # HEADER: Agency Id: BoM
* # HEADER: File Generation Date: 2008-08-01T14:30:45z
* # HEADER: File Format: BOM-HCS
* # HEADER: File Format Version: 2.0
* # HEADER: Generated by (system): TimeStudio
* # HEADER: Number of Records: 8
* # HEADER: Local ObsTime Offset: 0
* # HEADER: Data Fields: IndexNo, SensorType, SensorDataType, SiteIdType, SiteId, ObservationTimestamp, RealValue, Unit, SensorTypeParam1, SensorTypeParam2, Quality, Comment
* 1,"WL",1,"SLSR","44198-01-01","2008-02-01T01:07:06z",1.150000,"metres","LGH",,1,""
* 2,"WL",1,"SLSR","44198-01-01","2008-02-01T01:08:06z",1.200000,"metres","LGH",,1,""
* 3,"WL",1,"SLSR","44198-01-01","2008-02-01T01:43:06z",1.150000,"metres","LGH",,1,""
* 4,"WL",1,"SLSR","44198-01-01","2008-02-01T01:46:06z",1.200000,"metres","LGH",,1,""
* 5,"WL",1,"SLSR","44198-01-01","2008-02-01T02:04:06z",1.150000,"metres","LGH",,1,""
* 6,"WL",1,"SLSR","44198-01-01","2008-02-01T02:11:06z",1.200000,"metres","LGH",,1,""
* 7,"WL",1,"SLSR","44198-01-01","2008-02-01T02:16:06z",1.150000,"metres","LGH",,1,""
* 8,"WL",1,"SLSR","44198-01-01","2008-02-01T02:22:06z",1.200000,"metres","LGH",,1,""
*
*/

public final class HcsTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = LogManager.getLogger();
private TimeZone defaultTimeZone = null;
private LineReader reader = null;
private TimeSeriesContentHandler handler = null;
//Regular expression for timezone in hours
//Matches: can start with - or + or without, the numbers can be 2 digits starting with 0 or 2 digists starting with 1 followed by 0-3, or 1 digit that is 0-9
private static final Pattern pattern = Pattern.compile("[\\-\\+]?(0[0-9]|1[0-3]|[0-9])");

@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler handler) throws IOException {
this.reader = reader;
this.handler = handler;

handler.addMissingValue("");
DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
reader.setSkipEmptyLines(true);

parseHeader();

for (String[] buffer = new String[12]; reader.readLine(',', '\"', buffer) != -1; ) {
header.setParameterId(buffer[1]);
header.setLocationId(buffer[4]);
String timeText = buffer[5];
if (TextUtils.trimToNull(timeText) == null) continue;
TimeZone timeZone = defaultTimeZone;
if (TextUtils.endsWithIgnoreCase(timeText, "Z")) {
timeZone = TimeZoneUtils.GMT;
timeText = timeText.substring(0, timeText.length() - 1);
}
handler.setTime(timeZone, "yyyy-MM-dd'T'HH:mm:ss", timeText);
handler.setValue('.', buffer[6]);
header.setUnit(buffer[7] + " " + buffer[8]);
header.setTimeStep(parseTimeStep(buffer, timeZone));
handler.setFlag(buffer[10]);
handler.setComment(buffer[11]);
handler.setTimeSeriesHeader(header);
handler.applyCurrentFields();
}
}

private TimeStep parseTimeStep(String[] buffer, TimeZone timeZone) {
String timeStepString = TextUtils.trimToNull(buffer[9]);
if (timeStepString == null) return null;
if (TextUtils.equals(buffer[2], "1")) return null;
try {
int timeStepSeconds = TextUtils.parseInt(timeStepString);
if (timeStepSeconds == 0) return null;
return SimpleEquidistantTimeStep.getInstanceFromTimeZone(timeStepSeconds * TimeUnit.SECOND_MILLIS, timeZone);
} catch (NumberFormatException e) {
log.warn("Can not parse time step " + buffer[9] + " at " + reader.getFileAndLineNumber());
return null;
}
}

private void parseHeader() throws IOException {
defaultTimeZone = handler.getDefaultTimeZone();
reader.mark(200);
for (String line; (line = this.reader.readLine()) != null; reader.mark(200)) {
line = line.trim();
if (line.charAt(0) != '#') {
// this is not a header row, undo read line
reader.reset();
break;
}

// Supported formats:
// # HEADER: Local ObsTime Offset: (+/-)(h)h(:mm)
String[] elements = TextUtils.split(line, ':');
if (elements.length < 3) continue;
TextUtils.trimElements(elements);
if (!elements[1].equalsIgnoreCase("Local ObsTime Offset")) continue;

StringBuilder timeZone = new StringBuilder("");
//If length of elements is 4 or higher the timezone has minutes defined: (+/-)(h)h:mm
if (elements.length >= 4) {
timeZone.append(elements[2] + ':' + elements[3].substring(0, 2));
} else {
// If length of elements is less than 4, only hours are given and minutes ":00" will be appended
//A matcher is used to match the pattern to make sure only the correct format is taken
Matcher matcher = pattern.matcher(elements[2]);
if (matcher.find()) {
timeZone.append(matcher.group());
}
timeZone.append(":00");
}
//If a '+' or '-' is missing, a '+' will be prepended
if(timeZone.charAt(0) != '+' && timeZone.charAt(0) != '-') timeZone.insert(0, "+");
//If the length is lower than 5 an extra 0 is inserted so there are 2 digits for the hours
if (timeZone.length() <= 5) timeZone.insert(1, "0");

try {
defaultTimeZone = TimeZoneUtils.parseTimeZone("GMT" + timeZone);
} catch (ParseException e) {
log.error("Can not parse time zone " + timeZone + " at " + reader.getFileAndLineNumber());
}
}
}
}


  • No labels