Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
86ce925
NPI-4067 Add column misalignment checks based on SP3d spec
treefern Jul 1, 2025
5ce5c8f
NPI-4067 align logging with strict_mode logic
treefern Jul 1, 2025
6e89572
incremental refactor, so far still working
treefern Jul 1, 2025
3f48c51
NPI-4067 more small improvements
treefern Jul 1, 2025
03f49d9
NPI-4067 significant overhaul of strict mode structure, unifying stri…
treefern Jul 1, 2025
82e2637
NPI-4067 various test cleanup. One failure needs merge of earlier work
treefern Jul 1, 2025
19ca741
Merge branch 'NPI-4066-reject-sp3-on-overlong-lines' into NPI-4067-ch…
treefern Jul 1, 2025
0feebf4
NPI-4067 quick fixes for Pandas performance warnings... not sure how …
treefern Jul 1, 2025
6d1ead7
NPI-4067 quick Pandas deprecation fix. Existing test should cover it
treefern Jul 1, 2025
e41eb7c
NPI-4067 extensive restructuring of sp3 tests and check strictness op…
treefern Jul 1, 2025
30aa0cb
Merge branch 'main' into NPI-4067-check-sp3-column-alignment
treefern Jul 1, 2025
d1dca62
NPI-4067 revert logic to dedupe epochs even with strictness set to OF…
treefern Jul 11, 2025
579b157
NPI-4067 do not redundantly say warning in a warning log
treefern Jul 11, 2025
1287253
NPI-4067 use Optional rather than pipe, to maintain Python 3.9 compat…
treefern Jul 11, 2025
30ccf10
Merge branch 'main' into NPI-4067-check-sp3-column-alignment
treefern Sep 29, 2025
33d92de
NPI-4067 fix crashes in filename vs content discrepancy check by only…
treefern Sep 29, 2025
e7d6964
NPI-4067 tidy up SP3 overlong line warning to only print the first 5 …
treefern Sep 29, 2025
1f7d3a8
NPI-4067 update long term product test to explicitly enable LTP suppo…
treefern Sep 29, 2025
505c4a9
NPI-4067 update test for overlong SP3 lines, to match improved logic …
treefern Sep 29, 2025
996b011
NPI-4067 better docstring on determine_properties_from_filename()
treefern Sep 29, 2025
923c671
NPI-4067 updated docstring on filename vs content discrepancy check, …
treefern Sep 29, 2025
9435021
NPI-4067 add placeholder for new test
treefern Sep 29, 2025
a72a39c
Merge branch 'main' into NPI-4067-check-sp3-column-alignment
treefern Oct 1, 2025
0fbfae5
NPI-4067 clean up docstring based on PR feedback
treefern Oct 9, 2025
8b8d951
NPI-4067 tidy up comments based on PR feedback
treefern Oct 9, 2025
b750f72
NPI-4067 fix deprecated type hints in new code. Fixes for existing co…
treefern Oct 9, 2025
5027d96
Merge branch 'main' into NPI-4067-check-sp3-column-alignment
treefern Oct 9, 2025
5846860
NPI-4067 clean up integreation of recent PR ensuring tests for both c…
treefern Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions gnssanalysis/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def determine_sp3_name_props(
name_props = {}
# First, properties from the SP3 data:
try:
sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, format_check_strictness=strict_mode)
sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, strict_mode=strict_mode)
except Exception as e:
# TODO: Work out what exceptions read_sp3 can actually throw when given a non-SP3 file
if strict_mode == StrictModes.STRICT_RAISE:
Expand Down Expand Up @@ -769,26 +769,27 @@ def determine_sp3_name_props(
def determine_properties_from_filename(
filename: str,
expect_long_filenames: bool = False,
reject_long_term_products: bool = False,
reject_long_term_products: bool = True,
strict_mode: type[StrictMode] = StrictModes.STRICT_WARN,
include_compressed_flag: bool = False,
) -> Union[Dict[str, Any], None]:
) -> dict[str, Any]:
"""Determine IGS filename properties based purely on a filename

This function does its best to support both IGS long filenames and old short filenames.
Similar to other name property detection functions, it returns a dictionary containing
the name properties it manages to successfully determine.

:param str filename: filename to examine for naming properties
:param bool expect_long_filenames: expect provided filenames to conform to IGS long product filename
convention (v2.1), and raise / error if they do not.
:param bool reject_long_term_products: raise exception if an IGS Long Term Product is encountered (these have
no timerange / period, and include an end_epoch).
:param type[StrictMode] strict_mode: indicates whether to raise or warn, if filename is clearly not valid / a
format we support.
:param bool include_compressed_flag: include a flag in output, indicating if the filename indicated
compression (.gz)
:return Dict[str, Any]: dictionary containing the extracted name properties
:param bool expect_long_filenames: (off by default for backwards compatibility) expect provided filenames to
conform to IGS long product filename convention (v2.1), and raise / error if they do not.
:param bool reject_long_term_products: (on by default for backwards compatibility) raise warning or exception if
an IGS Long Term Product is encountered (these have no timerange / period, and include an end_epoch).
:param type[StrictMode] strict_mode: indicates whether to raise or warn (default), if filename is clearly
not valid / a format we support.
:param bool include_compressed_flag: (off by default for backwards compatibility) include a flag in output,
indicating if the filename indicated compression (.gz).
:return dict[str, Any]: dictionary containing the extracted name properties. Will be empty on errors, when
strict_mode is set to WARN (default).
:raises ValueError: if filename seems invalid / unsupported, E.g. if it is too long to be a short filename, but
doesn't match long filename regex
"""
Expand All @@ -798,7 +799,7 @@ def determine_properties_from_filename(
raise ValueError(f"Filename too long (over 51 chars): '{filename}'")
if strict_mode == StrictModes.STRICT_WARN:
warnings.warn(f"Filename too long (over 51 chars): '{filename}'")
return None
return {}

# Filename isn't too long...
# If we're expecting a long format filename, is it too short?
Expand All @@ -807,7 +808,7 @@ def determine_properties_from_filename(
raise ValueError(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on")
if strict_mode == StrictModes.STRICT_WARN:
warnings.warn(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on")
return None
return {}

match_long = _RE_IGS_LONG_FILENAME.fullmatch(filename)
if match_long is not None:
Expand Down Expand Up @@ -853,7 +854,7 @@ def determine_properties_from_filename(
raise ValueError(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on")
if strict_mode == StrictModes.STRICT_WARN:
warnings.warn(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on")
return None
return {}

start_epoch = datetime.datetime( # Lacks hour and minute precision in LTP version
year=int(match_long["year"]),
Expand Down Expand Up @@ -885,7 +886,7 @@ def determine_properties_from_filename(
raise ValueError(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'")
if strict_mode == StrictModes.STRICT_WARN:
warnings.warn(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'")
return None
return {}

# Is it plausibly a short filename?
if len(filename) >= 38:
Expand All @@ -894,7 +895,7 @@ def determine_properties_from_filename(
raise ValueError(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'")
if strict_mode == StrictModes.STRICT_WARN:
warnings.warn(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'")
return None
return {}

# Try to simplistically parse as short filename as last resort.

Expand Down Expand Up @@ -946,7 +947,9 @@ def determine_properties_from_filename(


def check_filename_and_contents_consistency(
input_file: pathlib.Path, ignore_single_epoch_short: bool = True
input_file: pathlib.Path,
ignore_single_epoch_short: bool = True,
output_orphan_prop_names: bool = False,
) -> Mapping[str, tuple[str, str]]:
"""
Checks that the content of the provided file matches what its filename says should be in it.
Expand All @@ -961,6 +964,10 @@ def check_filename_and_contents_consistency(
File properties which do not match are returned as a mapping of str -> tuple(str, str), taking the form
property_name > filename_derived_value, file_contents_derived_value
:param Path input_file: Path to the file to be checked.
:param bool ignore_single_epoch_short: (on by default) consider it ok for file content to be one epoch short of
what the filename says.
:param bool output_orphan_prop_names: (off by default) for properties found exclusively in file content or name
(not in both, and therefore not compared), return these as 'prop_name': None.
:return Mapping[str, tuple[str,str]]: Empty map if properties agree, else map of discrepancies, OR None on failure.
of property_name > filename_derived_value, file_contents_derived_value.
:raises NotImplementedError: if called with a file type not yet supported.
Expand All @@ -987,7 +994,20 @@ def check_filename_and_contents_consistency(
)

discrepancies = {}
for key in file_name_properties.keys():
# Check for keys only present on one side
orphan_keys = set(file_name_properties.keys()).symmetric_difference((set(file_content_properties.keys())))
logging.warning(
"The following properties can't be compared, as they were extracted only from file content or "
f"name (not both): {str(orphan_keys)}"
)
if output_orphan_prop_names:
# Output properties found only in content OR filename.
for orphan_key in orphan_keys:
discrepancies[orphan_key] = None

mutual_keys = set(file_name_properties.keys()).difference(orphan_keys)
# For keys present in both dicts, compare values.
for key in mutual_keys:
if (file_name_val := file_name_properties[key]) != (file_content_val := file_content_properties[key]):
# If enabled, and epoch interval successfully extracted, ignore cases where the timespan of epochs in the
# file content, is one epoch shorter than the timespan the filename implies (e.g. 23:55 vs 1D i.e. 24:00).
Expand Down
2 changes: 1 addition & 1 deletion gnssanalysis/gn_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def j2000_to_igs_dt(j2000_secs: _np.ndarray) -> _np.ndarray:
time_h = _pd.Series((hour - day).astype("int64").astype(str)).str.rjust(3).values
time_m = _pd.Series((minute - hour).astype("int64").astype(str)).str.rjust(3).values
# Width 12 due to one extra leading space (for easier concatenation next), then _0.00000000 format per SP3d spec:
time_s = (_pd.Series((datetime - minute)).view("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values
time_s = (_pd.Series((datetime - minute)).astype("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values
return date_y + date_m + date_d + time_h + time_m + time_s


Expand Down
Loading