mirror of
https://github.com/sstent/Garmin_Analyser.git
synced 2026-01-25 08:35:12 +00:00
455 lines
16 KiB
Python
455 lines
16 KiB
Python
"""File parser for various workout formats (FIT, TCX, GPX)."""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
try:
|
|
from fitparse import FitFile
|
|
except ImportError:
|
|
raise ImportError("fitparse package required. Install with: pip install fitparse")
|
|
|
|
from models.workout import WorkoutData, WorkoutMetadata, PowerData, HeartRateData, SpeedData, ElevationData, GearData
|
|
from config.settings import SUPPORTED_FORMATS, BikeConfig, INDOOR_KEYWORDS
|
|
from utils.gear_estimation import estimate_gear_series, compute_gear_summary
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class FileParser:
|
|
"""Parser for workout files in various formats."""
|
|
|
|
def __init__(self):
|
|
"""Initialize file parser."""
|
|
pass
|
|
|
|
def parse_file(self, file_path: Path) -> Optional[WorkoutData]:
|
|
"""Parse a workout file and return structured data.
|
|
|
|
Args:
|
|
file_path: Path to the workout file
|
|
|
|
Returns:
|
|
WorkoutData object or None if parsing failed
|
|
"""
|
|
if not file_path.exists():
|
|
logger.error(f"File not found: {file_path}")
|
|
return None
|
|
|
|
file_extension = file_path.suffix.lower()
|
|
|
|
if file_extension not in SUPPORTED_FORMATS:
|
|
logger.error(f"Unsupported file format: {file_extension}")
|
|
return None
|
|
|
|
try:
|
|
if file_extension == '.fit':
|
|
return self._parse_fit(file_path)
|
|
elif file_extension == '.tcx':
|
|
return self._parse_tcx(file_path)
|
|
elif file_extension == '.gpx':
|
|
return self._parse_gpx(file_path)
|
|
else:
|
|
logger.error(f"Parser not implemented for format: {file_extension}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse file {file_path}: {e}")
|
|
return None
|
|
|
|
def _parse_fit(self, file_path: Path) -> Optional[WorkoutData]:
|
|
"""Parse FIT file format.
|
|
|
|
Args:
|
|
file_path: Path to FIT file
|
|
|
|
Returns:
|
|
WorkoutData object or None if parsing failed
|
|
"""
|
|
try:
|
|
fit_file = FitFile(str(file_path))
|
|
|
|
# Extract session data
|
|
session_data = self._extract_fit_session(fit_file)
|
|
if not session_data:
|
|
logger.error("No session data found in FIT file")
|
|
return None
|
|
|
|
# Extract record data (timestamp-based data)
|
|
records = list(fit_file.get_messages('record'))
|
|
if not records:
|
|
logger.error("No record data found in FIT file")
|
|
return None
|
|
|
|
# Create DataFrame from records
|
|
df = self._fit_records_to_dataframe(records)
|
|
if df.empty:
|
|
logger.error("No valid data extracted from FIT records")
|
|
return None
|
|
|
|
# Create metadata
|
|
metadata = WorkoutMetadata(
|
|
activity_id=str(session_data.get('activity_id', 'unknown')),
|
|
activity_name=session_data.get('activity_name', 'Workout'),
|
|
start_time=session_data.get('start_time', pd.Timestamp.now()),
|
|
duration_seconds=session_data.get('total_timer_time', 0),
|
|
distance_meters=session_data.get('total_distance'),
|
|
avg_heart_rate=session_data.get('avg_heart_rate'),
|
|
max_heart_rate=session_data.get('max_heart_rate'),
|
|
avg_power=session_data.get('avg_power'),
|
|
max_power=session_data.get('max_power'),
|
|
avg_speed=session_data.get('avg_speed'),
|
|
max_speed=session_data.get('max_speed'),
|
|
elevation_gain=session_data.get('total_ascent'),
|
|
elevation_loss=session_data.get('total_descent'),
|
|
calories=session_data.get('total_calories'),
|
|
sport=session_data.get('sport', 'cycling'),
|
|
sub_sport=session_data.get('sub_sport'),
|
|
is_indoor=session_data.get('is_indoor', False)
|
|
)
|
|
|
|
if not metadata.is_indoor and metadata.activity_name:
|
|
metadata.is_indoor = any(keyword in metadata.activity_name.lower() for keyword in INDOOR_KEYWORDS)
|
|
|
|
# Create workout data
|
|
workout_data = WorkoutData(
|
|
metadata=metadata,
|
|
raw_data=df
|
|
)
|
|
|
|
# Add processed data if available
|
|
if not df.empty:
|
|
workout_data.power = self._extract_power_data(df)
|
|
workout_data.heart_rate = self._extract_heart_rate_data(df)
|
|
workout_data.speed = self._extract_speed_data(df)
|
|
workout_data.elevation = self._extract_elevation_data(df)
|
|
workout_data.gear = self._extract_gear_data(df)
|
|
|
|
return workout_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse FIT file {file_path}: {e}")
|
|
return None
|
|
|
|
def _extract_fit_session(self, fit_file) -> Optional[Dict[str, Any]]:
|
|
"""Extract session data from FIT file.
|
|
|
|
Args:
|
|
fit_file: FIT file object
|
|
|
|
Returns:
|
|
Dictionary with session data
|
|
"""
|
|
try:
|
|
sessions = list(fit_file.get_messages('session'))
|
|
if not sessions:
|
|
return None
|
|
|
|
session = sessions[0]
|
|
data = {}
|
|
|
|
for field in session:
|
|
if field.name and field.value is not None:
|
|
data[field.name] = field.value
|
|
|
|
return data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to extract session data: {e}")
|
|
return None
|
|
|
|
def _fit_records_to_dataframe(self, records) -> pd.DataFrame:
|
|
"""Convert FIT records to pandas DataFrame.
|
|
|
|
Args:
|
|
records: List of FIT record messages
|
|
|
|
Returns:
|
|
DataFrame with workout data
|
|
"""
|
|
data = []
|
|
|
|
for record in records:
|
|
record_data = {}
|
|
for field in record:
|
|
if field.name and field.value is not None:
|
|
record_data[field.name] = field.value
|
|
data.append(record_data)
|
|
|
|
if not data:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
# Convert timestamp to datetime
|
|
if 'timestamp' in df.columns:
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df = df.sort_values('timestamp')
|
|
df = df.reset_index(drop=True)
|
|
|
|
return df
|
|
|
|
def _extract_power_data(self, df: pd.DataFrame) -> Optional[PowerData]:
|
|
"""Extract power data from DataFrame.
|
|
|
|
Args:
|
|
df: DataFrame with workout data
|
|
|
|
Returns:
|
|
PowerData object or None
|
|
"""
|
|
if 'power' not in df.columns:
|
|
return None
|
|
|
|
power_values = df['power'].dropna().tolist()
|
|
if not power_values:
|
|
return None
|
|
|
|
return PowerData(
|
|
power_values=power_values,
|
|
estimated_power=[], # Will be calculated later
|
|
power_zones={}
|
|
)
|
|
|
|
def _extract_heart_rate_data(self, df: pd.DataFrame) -> Optional[HeartRateData]:
|
|
"""Extract heart rate data from DataFrame.
|
|
|
|
Args:
|
|
df: DataFrame with workout data
|
|
|
|
Returns:
|
|
HeartRateData object or None
|
|
"""
|
|
if 'heart_rate' not in df.columns:
|
|
return None
|
|
|
|
hr_values = df['heart_rate'].dropna().tolist()
|
|
if not hr_values:
|
|
return None
|
|
|
|
return HeartRateData(
|
|
heart_rate_values=hr_values,
|
|
hr_zones={},
|
|
avg_hr=np.mean(hr_values),
|
|
max_hr=np.max(hr_values)
|
|
)
|
|
|
|
def _extract_speed_data(self, df: pd.DataFrame) -> Optional[SpeedData]:
|
|
"""Extract speed data from DataFrame.
|
|
|
|
Args:
|
|
df: DataFrame with workout data
|
|
|
|
Returns:
|
|
SpeedData object or None
|
|
"""
|
|
if 'speed' not in df.columns:
|
|
return None
|
|
|
|
speed_values = df['speed'].dropna().tolist()
|
|
if not speed_values:
|
|
return None
|
|
|
|
# Convert m/s to km/h if needed
|
|
if max(speed_values) < 50: # Likely m/s
|
|
speed_values = [s * 3.6 for s in speed_values]
|
|
|
|
# Calculate distance if available
|
|
distance_values = []
|
|
if 'distance' in df.columns:
|
|
distance_values = df['distance'].dropna().tolist()
|
|
# Convert to km if in meters
|
|
if distance_values and max(distance_values) > 1000:
|
|
distance_values = [d / 1000 for d in distance_values]
|
|
|
|
return SpeedData(
|
|
speed_values=speed_values,
|
|
distance_values=distance_values,
|
|
avg_speed=np.mean(speed_values),
|
|
max_speed=np.max(speed_values),
|
|
total_distance=distance_values[-1] if distance_values else None
|
|
)
|
|
|
|
def _extract_elevation_data(self, df: pd.DataFrame) -> Optional[ElevationData]:
|
|
"""Extract elevation data from DataFrame.
|
|
|
|
Args:
|
|
df: DataFrame with workout data
|
|
|
|
Returns:
|
|
ElevationData object or None
|
|
"""
|
|
if 'altitude' not in df.columns and 'elevation' not in df.columns:
|
|
return None
|
|
|
|
# Use 'altitude' or 'elevation' column
|
|
elevation_col = 'altitude' if 'altitude' in df.columns else 'elevation'
|
|
elevation_values = df[elevation_col].dropna().tolist()
|
|
|
|
if not elevation_values:
|
|
return None
|
|
|
|
# Calculate gradients
|
|
gradient_values = self._calculate_gradients(df)
|
|
|
|
# Add gradient column to DataFrame
|
|
df['gradient_percent'] = gradient_values
|
|
|
|
return ElevationData(
|
|
elevation_values=elevation_values,
|
|
gradient_values=gradient_values,
|
|
elevation_gain=max(elevation_values) - min(elevation_values),
|
|
elevation_loss=0, # Will be calculated more accurately
|
|
max_gradient=np.max(gradient_values),
|
|
min_gradient=np.min(gradient_values)
|
|
)
|
|
|
|
def _extract_gear_data(self, df: pd.DataFrame) -> Optional[GearData]:
|
|
"""Extract gear data from DataFrame.
|
|
|
|
Args:
|
|
df: DataFrame with workout data
|
|
|
|
Returns:
|
|
GearData object or None
|
|
"""
|
|
if 'cadence_rpm' not in df.columns or 'speed_mps' not in df.columns:
|
|
logger.info("Gear estimation skipped: missing speed_mps or cadence_rpm columns")
|
|
return None
|
|
|
|
# Estimate gear series
|
|
gear_series = estimate_gear_series(
|
|
df,
|
|
wheel_circumference_m=BikeConfig.TIRE_CIRCUMFERENCE_M,
|
|
valid_configurations=BikeConfig.VALID_CONFIGURATIONS
|
|
)
|
|
|
|
if gear_series.empty:
|
|
logger.info("Gear estimation skipped: no valid data for estimation")
|
|
return None
|
|
|
|
# Compute summary
|
|
summary = compute_gear_summary(gear_series)
|
|
|
|
return GearData(
|
|
series=gear_series,
|
|
summary=summary
|
|
)
|
|
|
|
def _distance_window_indices(self, distance: np.ndarray, half_window_m: float) -> tuple[np.ndarray, np.ndarray]:
|
|
"""Compute backward and forward indices for distance-based windowing.
|
|
|
|
For each sample i, find the closest indices j <= i and k >= i such that
|
|
distance[i] - distance[j] >= half_window_m and distance[k] - distance[i] >= half_window_m.
|
|
|
|
Args:
|
|
distance: Monotonic array of cumulative distances in meters
|
|
half_window_m: Half window size in meters
|
|
|
|
Returns:
|
|
Tuple of (j_indices, k_indices) arrays
|
|
"""
|
|
n = len(distance)
|
|
j_indices = np.full(n, -1, dtype=int)
|
|
k_indices = np.full(n, -1, dtype=int)
|
|
|
|
for i in range(n):
|
|
# Find largest j <= i where distance[i] - distance[j] >= half_window_m
|
|
j = i
|
|
while j >= 0 and distance[i] - distance[j] < half_window_m:
|
|
j -= 1
|
|
j_indices[i] = max(j, 0)
|
|
|
|
# Find smallest k >= i where distance[k] - distance[i] >= half_window_m
|
|
k = i
|
|
while k < n and distance[k] - distance[i] < half_window_m:
|
|
k += 1
|
|
k_indices[i] = min(k, n - 1)
|
|
|
|
return j_indices, k_indices
|
|
|
|
def _calculate_gradients(self, df: pd.DataFrame) -> List[float]:
|
|
"""Calculate smoothed, distance-referenced gradients from elevation data.
|
|
|
|
Computes gradients using a distance-based smoothing window, handling missing
|
|
distance/speed/elevation data gracefully. Assumes 1 Hz sampling for distance
|
|
derivation if speed is available but distance is not.
|
|
|
|
Args:
|
|
df: DataFrame containing elevation, distance, and speed columns
|
|
|
|
Returns:
|
|
List of gradient values in percent, with NaN for invalid computations
|
|
"""
|
|
from config.settings import SMOOTHING_WINDOW
|
|
|
|
n = len(df)
|
|
if n < 2:
|
|
return [np.nan] * n
|
|
|
|
# Derive distance array
|
|
if 'distance' in df.columns:
|
|
distance = df['distance'].values.astype(float)
|
|
if not np.all(distance[1:] >= distance[:-1]):
|
|
logger.warning("Distance not monotonic, deriving from speed")
|
|
distance = None # Fall through to speed derivation
|
|
else:
|
|
distance = None
|
|
|
|
if distance is None:
|
|
if 'speed' in df.columns:
|
|
speed = df['speed'].values.astype(float)
|
|
distance = np.cumsum(speed) # dt=1 assumed
|
|
else:
|
|
logger.warning("No distance or speed available, cannot compute gradients")
|
|
return [np.nan] * n
|
|
|
|
# Get elevation
|
|
elevation_col = 'altitude' if 'altitude' in df.columns else 'elevation'
|
|
elevation = df[elevation_col].values.astype(float)
|
|
|
|
half_window = SMOOTHING_WINDOW / 2
|
|
j_arr, k_arr = self._distance_window_indices(distance, half_window)
|
|
|
|
gradients = []
|
|
for i in range(n):
|
|
j, k = j_arr[i], k_arr[i]
|
|
if distance[k] - distance[j] >= 1 and not (pd.isna(elevation[j]) or pd.isna(elevation[k])):
|
|
delta_elev = elevation[k] - elevation[j]
|
|
delta_dist = distance[k] - distance[j]
|
|
grad = 100 * delta_elev / delta_dist
|
|
grad = np.clip(grad, -30, 30)
|
|
gradients.append(grad)
|
|
else:
|
|
gradients.append(np.nan)
|
|
|
|
# Light smoothing: rolling median over 5 samples, interpolate isolated NaNs
|
|
grad_series = pd.Series(gradients)
|
|
smoothed = grad_series.rolling(5, center=True, min_periods=1).median()
|
|
smoothed = smoothed.interpolate(limit=3, limit_direction='both')
|
|
|
|
return smoothed.tolist()
|
|
|
|
def _parse_tcx(self, file_path: Path) -> Optional[WorkoutData]:
|
|
"""Parse TCX file format.
|
|
|
|
Args:
|
|
file_path: Path to TCX file
|
|
|
|
Returns:
|
|
WorkoutData object or None if parsing failed
|
|
"""
|
|
raise NotImplementedError("TCX file parsing is not yet implemented.")
|
|
|
|
def _parse_gpx(self, file_path: Path) -> Optional[WorkoutData]:
|
|
"""Parse GPX file format.
|
|
|
|
Args:
|
|
file_path: Path to GPX file
|
|
|
|
Returns:
|
|
WorkoutData object or None if parsing failed
|
|
"""
|
|
raise NotImplementedError("GPX file parsing is not yet implemented.") |