mirror of
https://github.com/sstent/Garmin_Analyser.git
synced 2026-01-25 16:42:40 +00:00
sync
This commit is contained in:
371
parsers/file_parser.py
Normal file
371
parsers/file_parser.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""File parser for various workout formats (FIT, TCX, GPX)."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from fitparse import FitFile
|
||||
except ImportError:
|
||||
raise ImportError("fitparse package required. Install with: pip install fitparse")
|
||||
|
||||
from ..models.workout import WorkoutData, WorkoutMetadata, PowerData, HeartRateData, SpeedData, ElevationData, GearData
|
||||
from ..config.settings import SUPPORTED_FORMATS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileParser:
|
||||
"""Parser for workout files in various formats."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize file parser."""
|
||||
pass
|
||||
|
||||
def parse_file(self, file_path: Path) -> Optional[WorkoutData]:
|
||||
"""Parse a workout file and return structured data.
|
||||
|
||||
Args:
|
||||
file_path: Path to the workout file
|
||||
|
||||
Returns:
|
||||
WorkoutData object or None if parsing failed
|
||||
"""
|
||||
if not file_path.exists():
|
||||
logger.error(f"File not found: {file_path}")
|
||||
return None
|
||||
|
||||
file_extension = file_path.suffix.lower()
|
||||
|
||||
if file_extension not in SUPPORTED_FORMATS:
|
||||
logger.error(f"Unsupported file format: {file_extension}")
|
||||
return None
|
||||
|
||||
try:
|
||||
if file_extension == '.fit':
|
||||
return self._parse_fit(file_path)
|
||||
elif file_extension == '.tcx':
|
||||
return self._parse_tcx(file_path)
|
||||
elif file_extension == '.gpx':
|
||||
return self._parse_gpx(file_path)
|
||||
else:
|
||||
logger.error(f"Parser not implemented for format: {file_extension}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def _parse_fit(self, file_path: Path) -> Optional[WorkoutData]:
|
||||
"""Parse FIT file format.
|
||||
|
||||
Args:
|
||||
file_path: Path to FIT file
|
||||
|
||||
Returns:
|
||||
WorkoutData object or None if parsing failed
|
||||
"""
|
||||
try:
|
||||
fit_file = FitFile(str(file_path))
|
||||
|
||||
# Extract session data
|
||||
session_data = self._extract_fit_session(fit_file)
|
||||
if not session_data:
|
||||
logger.error("No session data found in FIT file")
|
||||
return None
|
||||
|
||||
# Extract record data (timestamp-based data)
|
||||
records = list(fit_file.get_messages('record'))
|
||||
if not records:
|
||||
logger.error("No record data found in FIT file")
|
||||
return None
|
||||
|
||||
# Create DataFrame from records
|
||||
df = self._fit_records_to_dataframe(records)
|
||||
if df.empty:
|
||||
logger.error("No valid data extracted from FIT records")
|
||||
return None
|
||||
|
||||
# Create metadata
|
||||
metadata = WorkoutMetadata(
|
||||
activity_id=str(session_data.get('activity_id', 'unknown')),
|
||||
activity_name=session_data.get('activity_name', 'Workout'),
|
||||
start_time=session_data.get('start_time', pd.Timestamp.now()),
|
||||
duration_seconds=session_data.get('total_timer_time', 0),
|
||||
distance_meters=session_data.get('total_distance'),
|
||||
avg_heart_rate=session_data.get('avg_heart_rate'),
|
||||
max_heart_rate=session_data.get('max_heart_rate'),
|
||||
avg_power=session_data.get('avg_power'),
|
||||
max_power=session_data.get('max_power'),
|
||||
avg_speed=session_data.get('avg_speed'),
|
||||
max_speed=session_data.get('max_speed'),
|
||||
elevation_gain=session_data.get('total_ascent'),
|
||||
elevation_loss=session_data.get('total_descent'),
|
||||
calories=session_data.get('total_calories'),
|
||||
sport=session_data.get('sport', 'cycling'),
|
||||
sub_sport=session_data.get('sub_sport'),
|
||||
is_indoor=session_data.get('is_indoor', False)
|
||||
)
|
||||
|
||||
# Create workout data
|
||||
workout_data = WorkoutData(
|
||||
metadata=metadata,
|
||||
raw_data=df
|
||||
)
|
||||
|
||||
# Add processed data if available
|
||||
if not df.empty:
|
||||
workout_data.power = self._extract_power_data(df)
|
||||
workout_data.heart_rate = self._extract_heart_rate_data(df)
|
||||
workout_data.speed = self._extract_speed_data(df)
|
||||
workout_data.elevation = self._extract_elevation_data(df)
|
||||
workout_data.gear = self._extract_gear_data(df)
|
||||
|
||||
return workout_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse FIT file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def _extract_fit_session(self, fit_file) -> Optional[Dict[str, Any]]:
|
||||
"""Extract session data from FIT file.
|
||||
|
||||
Args:
|
||||
fit_file: FIT file object
|
||||
|
||||
Returns:
|
||||
Dictionary with session data
|
||||
"""
|
||||
try:
|
||||
sessions = list(fit_file.get_messages('session'))
|
||||
if not sessions:
|
||||
return None
|
||||
|
||||
session = sessions[0]
|
||||
data = {}
|
||||
|
||||
for field in session:
|
||||
if field.name and field.value is not None:
|
||||
data[field.name] = field.value
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract session data: {e}")
|
||||
return None
|
||||
|
||||
def _fit_records_to_dataframe(self, records) -> pd.DataFrame:
|
||||
"""Convert FIT records to pandas DataFrame.
|
||||
|
||||
Args:
|
||||
records: List of FIT record messages
|
||||
|
||||
Returns:
|
||||
DataFrame with workout data
|
||||
"""
|
||||
data = []
|
||||
|
||||
for record in records:
|
||||
record_data = {}
|
||||
for field in record:
|
||||
if field.name and field.value is not None:
|
||||
record_data[field.name] = field.value
|
||||
data.append(record_data)
|
||||
|
||||
if not data:
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Convert timestamp to datetime
|
||||
if 'timestamp' in df.columns:
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
||||
df = df.sort_values('timestamp')
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
return df
|
||||
|
||||
def _extract_power_data(self, df: pd.DataFrame) -> Optional[PowerData]:
|
||||
"""Extract power data from DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with workout data
|
||||
|
||||
Returns:
|
||||
PowerData object or None
|
||||
"""
|
||||
if 'power' not in df.columns:
|
||||
return None
|
||||
|
||||
power_values = df['power'].dropna().tolist()
|
||||
if not power_values:
|
||||
return None
|
||||
|
||||
return PowerData(
|
||||
power_values=power_values,
|
||||
estimated_power=[], # Will be calculated later
|
||||
power_zones={}
|
||||
)
|
||||
|
||||
def _extract_heart_rate_data(self, df: pd.DataFrame) -> Optional[HeartRateData]:
|
||||
"""Extract heart rate data from DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with workout data
|
||||
|
||||
Returns:
|
||||
HeartRateData object or None
|
||||
"""
|
||||
if 'heart_rate' not in df.columns:
|
||||
return None
|
||||
|
||||
hr_values = df['heart_rate'].dropna().tolist()
|
||||
if not hr_values:
|
||||
return None
|
||||
|
||||
return HeartRateData(
|
||||
heart_rate_values=hr_values,
|
||||
hr_zones={},
|
||||
avg_hr=np.mean(hr_values),
|
||||
max_hr=np.max(hr_values)
|
||||
)
|
||||
|
||||
def _extract_speed_data(self, df: pd.DataFrame) -> Optional[SpeedData]:
|
||||
"""Extract speed data from DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with workout data
|
||||
|
||||
Returns:
|
||||
SpeedData object or None
|
||||
"""
|
||||
if 'speed' not in df.columns:
|
||||
return None
|
||||
|
||||
speed_values = df['speed'].dropna().tolist()
|
||||
if not speed_values:
|
||||
return None
|
||||
|
||||
# Convert m/s to km/h if needed
|
||||
if max(speed_values) < 50: # Likely m/s
|
||||
speed_values = [s * 3.6 for s in speed_values]
|
||||
|
||||
# Calculate distance if available
|
||||
distance_values = []
|
||||
if 'distance' in df.columns:
|
||||
distance_values = df['distance'].dropna().tolist()
|
||||
# Convert to km if in meters
|
||||
if distance_values and max(distance_values) > 1000:
|
||||
distance_values = [d / 1000 for d in distance_values]
|
||||
|
||||
return SpeedData(
|
||||
speed_values=speed_values,
|
||||
distance_values=distance_values,
|
||||
avg_speed=np.mean(speed_values),
|
||||
max_speed=np.max(speed_values),
|
||||
total_distance=distance_values[-1] if distance_values else None
|
||||
)
|
||||
|
||||
def _extract_elevation_data(self, df: pd.DataFrame) -> Optional[ElevationData]:
|
||||
"""Extract elevation data from DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with workout data
|
||||
|
||||
Returns:
|
||||
ElevationData object or None
|
||||
"""
|
||||
if 'altitude' not in df.columns and 'elevation' not in df.columns:
|
||||
return None
|
||||
|
||||
# Use 'altitude' or 'elevation' column
|
||||
elevation_col = 'altitude' if 'altitude' in df.columns else 'elevation'
|
||||
elevation_values = df[elevation_col].dropna().tolist()
|
||||
|
||||
if not elevation_values:
|
||||
return None
|
||||
|
||||
# Calculate gradients
|
||||
gradient_values = self._calculate_gradients(elevation_values)
|
||||
|
||||
return ElevationData(
|
||||
elevation_values=elevation_values,
|
||||
gradient_values=gradient_values,
|
||||
elevation_gain=max(elevation_values) - min(elevation_values),
|
||||
elevation_loss=0, # Will be calculated more accurately
|
||||
max_gradient=np.max(gradient_values),
|
||||
min_gradient=np.min(gradient_values)
|
||||
)
|
||||
|
||||
def _extract_gear_data(self, df: pd.DataFrame) -> Optional[GearData]:
|
||||
"""Extract gear data from DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with workout data
|
||||
|
||||
Returns:
|
||||
GearData object or None
|
||||
"""
|
||||
if 'cadence' not in df.columns:
|
||||
return None
|
||||
|
||||
cadence_values = df['cadence'].dropna().tolist()
|
||||
if not cadence_values:
|
||||
return None
|
||||
|
||||
return GearData(
|
||||
gear_ratios=[],
|
||||
cadence_values=cadence_values,
|
||||
estimated_gear=[],
|
||||
chainring_teeth=38, # Default
|
||||
cassette_teeth=[14, 16, 18, 20]
|
||||
)
|
||||
|
||||
def _calculate_gradients(self, elevation_values: List[float]) -> List[float]:
|
||||
"""Calculate gradients from elevation data.
|
||||
|
||||
Args:
|
||||
elevation_values: List of elevation values in meters
|
||||
|
||||
Returns:
|
||||
List of gradient values in percent
|
||||
"""
|
||||
if len(elevation_values) < 2:
|
||||
return [0.0] * len(elevation_values)
|
||||
|
||||
gradients = [0.0] # First point has no gradient
|
||||
|
||||
for i in range(1, len(elevation_values)):
|
||||
elevation_diff = elevation_values[i] - elevation_values[i-1]
|
||||
# Assume 10m distance between points for gradient calculation
|
||||
distance = 10.0
|
||||
gradient = (elevation_diff / distance) * 100
|
||||
gradients.append(gradient)
|
||||
|
||||
return gradients
|
||||
|
||||
def _parse_tcx(self, file_path: Path) -> Optional[WorkoutData]:
|
||||
"""Parse TCX file format.
|
||||
|
||||
Args:
|
||||
file_path: Path to TCX file
|
||||
|
||||
Returns:
|
||||
WorkoutData object or None if parsing failed
|
||||
"""
|
||||
logger.warning("TCX parser not implemented yet")
|
||||
return None
|
||||
|
||||
def _parse_gpx(self, file_path: Path) -> Optional[WorkoutData]:
|
||||
"""Parse GPX file format.
|
||||
|
||||
Args:
|
||||
file_path: Path to GPX file
|
||||
|
||||
Returns:
|
||||
WorkoutData object or None if parsing failed
|
||||
"""
|
||||
logger.warning("GPX parser not implemented yet")
|
||||
return None
|
||||
Reference in New Issue
Block a user