Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

package nl.wldelft.timeseriesparsers;

...



import nl.wldelft.util.

...

TextUtils;

...


import nl.wldelft.util.TimeUnit;

...


import nl.wldelft.util.TimeZoneUtils;

...


import nl.wldelft.util.io.LineReader;

...


import nl.wldelft.util.io.TextParser;

...


import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;

...


import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep;

...


import nl.wldelft.util.timeseries.TimeSeriesContentHandler;

...


import nl.wldelft.util.timeseries.TimeStep;

...


import org.apache.logging.log4j.Logger;

...


import org.apache.logging.log4j.LogManager;

import java.io.IOException;

...


import java.text.ParseException;

...


import java.util.TimeZone

...

;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
*

...


* Imports time series data in a tabular ASCII format from the Australian Bureau of

...


* Meteorology (HCS). The files consist of a set of header lines and then lines

...


* with a fixed number of fields. The fields are separated by a comma and the order is fixed.

...


* Multiple locations and parameters can be put in a single file.

...


*

...


* Example:

...


* # HEADER: Agency Id: BoM

...


* # HEADER: File Generation Date: 2008-08-01T14:30:45z

...


* # HEADER: File Format: BOM-HCS

...


* # HEADER: File Format Version: 2.0

...


* # HEADER: Generated by (system): TimeStudio

...


* # HEADER: Number of Records: 8

...


* # HEADER: Local ObsTime Offset: 0

...


* # HEADER: Data Fields: IndexNo, SensorType, SensorDataType, SiteIdType, SiteId, ObservationTimestamp, RealValue, Unit, SensorTypeParam1, SensorTypeParam2, Quality, Comment

...


* 1,"WL",1,"SLSR","44198-01-01","2008-02-01T01:07:06z",1.150000,"metres","LGH",,1,""

...


* 2,"WL",1,"SLSR","44198-01-01","2008-02-01T01:08:06z",1.200000,"metres","LGH",,1,""

...


* 3,"WL",1,"SLSR","44198-01-01","2008-02-01T01:43:06z",1.150000,"metres","LGH",,1,""

...


* 4,"WL",1,"SLSR","44198-01-01","2008-02-01T01:46:06z",1.200000,"metres","LGH",,1,""

...


* 5,"WL",1,"SLSR","44198-01-01","2008-02-01T02:04:06z",1.150000,"metres","LGH",,1,""

...


* 6,"WL",1,"SLSR","44198-01-01","2008-02-01T02:11:06z",1.200000,"metres","LGH",,1,""

...


* 7,"WL",1,"SLSR","44198-01-01","2008-02-01T02:16:06z",1.150000,"metres","LGH",,1,""

...


* 8,"WL",1,"SLSR","44198-01-01","2008-02-01T02:22:06z",1.200000,"metres","LGH",,1,""

...


*

...


*/

...



public final class HcsTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {

...


private static final Logger log =

...

LogManager.getLogger(

...

);

...


private TimeZone defaultTimeZone = null;

...


private LineReader reader = null;

...


private TimeSeriesContentHandler handler = null;

...


//Regular expression for timezone in hours
//Matches: can start with - or + or without, the numbers can be 2 digits starting with 0 or 2 digists starting with 1 followed by 0-3, or 1 digit that is 0-9
private static final Pattern pattern = Pattern.compile("[\\-\\+]?(0[0-9]|1[0-3]|[0-9])");

@Override

...


public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler handler) throws IOException {

...


this.reader = reader;

...


this.handler = handler;

...



handler.addMissingValue("");

...


DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();

...


reader.setSkipEmptyLines(true);

...



parseHeader();

...



for (String[] buffer = new String[12]; reader.readLine(',', '\"', buffer) != -1; ) {

...


header.setParameterId(buffer[1]);

...


header.setLocationId(buffer[4]);

...


String timeText = buffer[5];

...


if (TextUtils.trimToNull(timeText) == null) continue;

...


TimeZone timeZone = defaultTimeZone;

...


if (TextUtils.endsWithIgnoreCase(timeText, "Z")) {

...


timeZone =

...

TimeZoneUtils.GMT;

...


timeText = timeText.substring(0, timeText.length() - 1);

...


}

...


handler.setTime(timeZone, "yyyy-MM-dd'T'HH:mm:ss", timeText);

...


handler.setValue('.', buffer[6]);

...


header.setUnit(buffer[7] + " " + buffer[8]);

...


header.setTimeStep(parseTimeStep(buffer, timeZone));

...


handler.setFlag(buffer[10]);

...


handler.setComment(buffer[11]);

...


handler.setTimeSeriesHeader(header);

...


handler.applyCurrentFields();

...


}

...


}

...



private TimeStep parseTimeStep(String[] buffer, TimeZone timeZone) {

...


String timeStepString

...

= 

...

TextUtils.trimToNull(buffer[9]);
if (timeStepString == null) return null;

...


if (TextUtils.equals(buffer[2], "1")) return null;

...


try {

...


int timeStepSeconds = TextUtils.parseInt(timeStepString);
if (timeStepSeconds == 0) return null;
return SimpleEquidistantTimeStep.getInstanceFromTimeZone(

...

timeStepSeconds * TimeUnit.SECOND_MILLIS, timeZone);

...


} catch (NumberFormatException e) {

...


log.

...

warn("Can not parse time step " + buffer[9] + " at  " + reader.getFileAndLineNumber());

...


return null;

...


}

...


}

...



private void parseHeader() throws IOException {

...


defaultTimeZone = handler.getDefaultTimeZone();

...


reader.mark(200);

...


for (String line; (line = this.reader.readLine()) != null; reader.mark(200)) {

...


line = line.trim();

...


if (line.charAt(0) != '#') {

...


// this is not a header row, undo read line

...


reader.reset();

...


break;

...


}

...



// Supported

...

formats:
//

...

# HEADER: Local ObsTime Offset: (+/-)(h)h(:mm)

...

String[] elements = TextUtils.split(line, ':');
if (elements.length < 3) continue;

...

  

...

 

...

 

...

 

...

 

...

 

...

  TextUtils.trimElements(elements);

...

 

...

 

...

if (!elements[1].equalsIgnoreCase("Local ObsTime Offset

...

")) continue;

StringBuilder timeZone = new StringBuilder("");
//

...

If length of elements is 4 or higher the timezone has minutes defined: (+/-)(h)h:mm

...

if (elements.length >=

...

 4) {
timeZone.append(elements[2] + ':' + elements[3].substring(0, 2)

...

);
} else {
// If length

...

of 

...

elements is less than 4, only hours are given and minutes ":00" will be appended
//A matcher is used to match the pattern to make sure only the correct format is taken
Matcher matcher =

...

pattern.

...

matcher(elements[2]);

...


if (

...

matcher.find()) {
timeZone.append(matcher.group());

...

 

...

 

...

}

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 

...

 timeZone.append(":00"

...

);
}

...

 

...

 //If a '+' or '-' is missing, a '+' will be prepended

...

 

...

 if(timeZone.charAt(0) !=

...

 '+' 

...

&& timeZone.charAt(0) 

...

!= '-') 

...

 timeZone

...

.insert(0, "+");
//If the length is lower than 5 an extra 0 is inserted so there are 2 digits for the hours
if (timeZone.length() <= 5) timeZone.insert(1, "0");

try {

...


...

   

...

defaultTimeZone = TimeZoneUtils.parseTimeZone("GMT" + timeZone);

...


...

} catch (ParseException e) {

...


log.error("Can not parse time zone " + timeZone + " at " + reader.getFileAndLineNumber());

...


}

...


}

...


}

...


}

...