...
package nl.wldelft.timeseriesparsers;
...
import nl.wldelft.util.
...
TextUtils;
...
import nl.wldelft.util.TimeUnit;
...
import nl.wldelft.util.TimeZoneUtils;
...
import nl.wldelft.util.io.LineReader;
...
import nl.wldelft.util.io.TextParser;
...
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
...
import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep;
...
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
...
import nl.wldelft.util.timeseries.TimeStep;
...
import org.apache.logging.log4j.Logger;
...
import org.apache.logging.log4j.LogManager;
import java.io.IOException;
...
import java.text.ParseException;
...
import java.util.TimeZone
...
;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
*
...
* Imports time series data in a tabular ASCII format from the Australian Bureau of
...
* Meteorology (HCS). The files consist of a set of header lines and then lines
...
* with a fixed number of fields. The fields are separated by a comma and the order is fixed.
...
* Multiple locations and parameters can be put in a single file.
...
*
...
* Example:
...
* # HEADER: Agency Id: BoM
...
* # HEADER: File Generation Date: 2008-08-01T14:30:45z
...
* # HEADER: File Format: BOM-HCS
...
* # HEADER: File Format Version: 2.0
...
* # HEADER: Generated by (system): TimeStudio
...
* # HEADER: Number of Records: 8
...
* # HEADER: Local ObsTime Offset: 0
...
* # HEADER: Data Fields: IndexNo, SensorType, SensorDataType, SiteIdType, SiteId, ObservationTimestamp, RealValue, Unit, SensorTypeParam1, SensorTypeParam2, Quality, Comment
...
* 1,"WL",1,"SLSR","44198-01-01","2008-02-01T01:07:06z",1.150000,"metres","LGH",,1,""
...
* 2,"WL",1,"SLSR","44198-01-01","2008-02-01T01:08:06z",1.200000,"metres","LGH",,1,""
...
* 3,"WL",1,"SLSR","44198-01-01","2008-02-01T01:43:06z",1.150000,"metres","LGH",,1,""
...
* 4,"WL",1,"SLSR","44198-01-01","2008-02-01T01:46:06z",1.200000,"metres","LGH",,1,""
...
* 5,"WL",1,"SLSR","44198-01-01","2008-02-01T02:04:06z",1.150000,"metres","LGH",,1,""
...
* 6,"WL",1,"SLSR","44198-01-01","2008-02-01T02:11:06z",1.200000,"metres","LGH",,1,""
...
* 7,"WL",1,"SLSR","44198-01-01","2008-02-01T02:16:06z",1.150000,"metres","LGH",,1,""
...
* 8,"WL",1,"SLSR","44198-01-01","2008-02-01T02:22:06z",1.200000,"metres","LGH",,1,""
...
*
...
*/
...
public final class HcsTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
...
private static final Logger log =
...
LogManager.getLogger(
...
);
...
private TimeZone defaultTimeZone = null;
...
private LineReader reader = null;
...
private TimeSeriesContentHandler handler = null;
...
//Regular expression for timezone in hours
//Matches: can start with - or + or without, the numbers can be 2 digits starting with 0 or 2 digists starting with 1 followed by 0-3, or 1 digit that is 0-9
private static final Pattern pattern = Pattern.compile("[\\-\\+]?(0[0-9]|1[0-3]|[0-9])");
@Override
...
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler handler) throws IOException {
...
this.reader = reader;
...
this.handler = handler;
...
handler.addMissingValue("");
...
DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
...
reader.setSkipEmptyLines(true);
...
parseHeader();
...
for (String[] buffer = new String[12]; reader.readLine(',', '\"', buffer) != -1; ) {
...
header.setParameterId(buffer[1]);
...
header.setLocationId(buffer[4]);
...
String timeText = buffer[5];
...
if (TextUtils.trimToNull(timeText) == null) continue;
...
TimeZone timeZone = defaultTimeZone;
...
if (TextUtils.endsWithIgnoreCase(timeText, "Z")) {
...
timeZone =
...
TimeZoneUtils.GMT;
...
timeText = timeText.substring(0, timeText.length() - 1);
...
}
...
handler.setTime(timeZone, "yyyy-MM-dd'T'HH:mm:ss", timeText);
...
handler.setValue('.', buffer[6]);
...
header.setUnit(buffer[7] + " " + buffer[8]);
...
header.setTimeStep(parseTimeStep(buffer, timeZone));
...
handler.setFlag(buffer[10]);
...
handler.setComment(buffer[11]);
...
handler.setTimeSeriesHeader(header);
...
handler.applyCurrentFields();
...
}
...
}
...
private TimeStep parseTimeStep(String[] buffer, TimeZone timeZone) {
...
String timeStepString
...
=
...
TextUtils.trimToNull(buffer[9]);
if (timeStepString == null) return null;
...
if (TextUtils.equals(buffer[2], "1")) return null;
...
try {
...
int timeStepSeconds = TextUtils.parseInt(timeStepString);
if (timeStepSeconds == 0) return null;
return SimpleEquidistantTimeStep.getInstanceFromTimeZone(
...
timeStepSeconds * TimeUnit.SECOND_MILLIS, timeZone);
...
} catch (NumberFormatException e) {
...
log.
...
warn("Can not parse time step " + buffer[9] + " at " + reader.getFileAndLineNumber());
...
return null;
...
}
...
}
...
private void parseHeader() throws IOException {
...
defaultTimeZone = handler.getDefaultTimeZone();
...
reader.mark(200);
...
for (String line; (line = this.reader.readLine()) != null; reader.mark(200)) {
...
line = line.trim();
...
if (line.charAt(0) != '#') {
...
// this is not a header row, undo read line
...
reader.reset();
...
break;
...
}
...
// Supported
...
formats:
//
...
# HEADER: Local ObsTime Offset: (+/-)(h)h(:mm)
...
String[] elements = TextUtils.split(line, ':');
if (elements.length < 3) continue;
...
...
...
...
...
...
...
TextUtils.trimElements(elements);
...
...
...
if (!elements[1].equalsIgnoreCase("Local ObsTime Offset
...
")) continue;
StringBuilder timeZone = new StringBuilder("");
//
...
If length of elements is 4 or higher the timezone has minutes defined: (+/-)(h)h:mm
...
if (elements.length >=
...
4) {
timeZone.append(elements[2] + ':' + elements[3].substring(0, 2)
...
);
} else {
// If length
...
of
...
elements is less than 4, only hours are given and minutes ":00" will be appended
//A matcher is used to match the pattern to make sure only the correct format is taken
Matcher matcher =
...
pattern.
...
matcher(elements[2]);
...
if (
...
matcher.find()) {
timeZone.append(matcher.group());
...
...
...
}
...
...
...
...
...
...
...
...
...
...
...
...
...
...
timeZone.append(":00"
...
);
}
...
...
//If a '+' or '-' is missing, a '+' will be prepended
...
...
if(timeZone.charAt(0) !=
...
'+'
...
&& timeZone.charAt(0)
...
!= '-')
...
timeZone
...
.insert(0, "+");
//If the length is lower than 5 an extra 0 is inserted so there are 2 digits for the hours
if (timeZone.length() <= 5) timeZone.insert(1, "0");
try {
...
...
...
defaultTimeZone = TimeZoneUtils.parseTimeZone("GMT" + timeZone);
...
...
} catch (ParseException e) {
...
log.error("Can not parse time zone " + timeZone + " at " + reader.getFileAndLineNumber());
...
}
...
}
...
}
...
}
...