working - checkpoint 2

This commit is contained in:
2025-08-24 07:44:32 -07:00
parent 2f5db981a4
commit 1754775f4c
10 changed files with 1769 additions and 1372 deletions

View File

@@ -1,37 +1,57 @@
import os
import signal
import sys
import threading
import asyncio
import concurrent.futures
import time
from datetime import datetime
from queue import PriorityQueue
import threading
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from .database import Activity, DaemonConfig, SyncLog, get_session
from .database import Activity, DaemonConfig, SyncLog, get_legacy_session, init_db, get_offline_stats
from .garmin import GarminClient
from .utils import logger
from .activity_parser import get_activity_metrics
# Priority levels: 1=High (API requests), 2=Medium (Sync jobs), 3=Low (Reprocessing)
PRIORITY_HIGH = 1
PRIORITY_MEDIUM = 2
PRIORITY_LOW = 3
class GarminSyncDaemon:
def __init__(self):
self.scheduler = BackgroundScheduler()
self.running = False
self.web_server = None
# Process pool for CPU-bound tasks
self.executor = concurrent.futures.ProcessPoolExecutor(
max_workers=os.cpu_count() - 1 or 1
)
# Priority queue for task scheduling
self.task_queue = PriorityQueue()
# Worker thread for processing tasks
self.worker_thread = threading.Thread(target=self._process_tasks, daemon=True)
# Lock for database access during migration
self.db_lock = threading.Lock()
def start(self, web_port=8888, run_migrations=True):
"""Start daemon with scheduler and web UI
:param web_port: Port for the web UI
:param run_migrations: Whether to run database migrations on startup
"""
# Set migration flag for entrypoint
if run_migrations:
os.environ['RUN_MIGRATIONS'] = "1"
else:
os.environ['RUN_MIGRATIONS'] = "0"
"""Start daemon with scheduler and web UI"""
try:
# Initialize database (synchronous)
with self.db_lock:
init_db()
# Set migration flag for entrypoint
if run_migrations:
os.environ['RUN_MIGRATIONS'] = "1"
else:
os.environ['RUN_MIGRATIONS'] = "0"
# Start task processing worker
self.worker_thread.start()
# Load configuration from database
config_data = self.load_config()
@@ -48,7 +68,7 @@ class GarminSyncDaemon:
cron_str = "0 */6 * * *"
self.scheduler.add_job(
func=self.sync_and_download,
func=self._enqueue_sync,
trigger=CronTrigger.from_crontab(cron_str),
id="sync_job",
replace_existing=True,
@@ -58,7 +78,7 @@ class GarminSyncDaemon:
logger.error(f"Failed to create sync job: {str(e)}")
# Fallback to default schedule
self.scheduler.add_job(
func=self.sync_and_download,
func=self._enqueue_sync,
trigger=CronTrigger.from_crontab("0 */6 * * *"),
id="sync_job",
replace_existing=True,
@@ -66,10 +86,10 @@ class GarminSyncDaemon:
logger.info("Using default schedule for sync job: '0 */6 * * *'")
# Reprocess job - run daily at 2 AM
reprocess_cron = "0 2 * * *" # Daily at 2 AM
reprocess_cron = "0 2 * * *"
try:
self.scheduler.add_job(
func=self.reprocess_activities,
func=self._enqueue_reprocess,
trigger=CronTrigger.from_crontab(reprocess_cron),
id="reprocess_job",
replace_existing=True,
@@ -77,16 +97,6 @@ class GarminSyncDaemon:
logger.info(f"Reprocess job scheduled with cron: '{reprocess_cron}'")
except Exception as e:
logger.error(f"Failed to create reprocess job: {str(e)}")
except Exception as e:
logger.error(f"Failed to create scheduled job: {str(e)}")
# Fallback to default schedule
self.scheduler.add_job(
func=self.sync_and_download,
trigger=CronTrigger.from_crontab("0 */6 * * *"),
id="sync_job",
replace_existing=True,
)
logger.info("Using default schedule '0 */6 * * *'")
# Start scheduler
self.scheduler.start()
@@ -115,8 +125,52 @@ class GarminSyncDaemon:
self.update_daemon_status("error")
self.stop()
def _enqueue_sync(self):
"""Enqueue sync job with medium priority"""
self.task_queue.put((PRIORITY_MEDIUM, ("sync", None)))
logger.debug("Enqueued sync job")
def _enqueue_reprocess(self):
"""Enqueue reprocess job with low priority"""
self.task_queue.put((PRIORITY_LOW, ("reprocess", None)))
logger.debug("Enqueued reprocess job")
def _process_tasks(self):
"""Worker thread to process tasks from the priority queue"""
logger.info("Task worker started")
while self.running:
try:
priority, (task_type, data) = self.task_queue.get(timeout=1)
logger.info(f"Processing {task_type} task (priority {priority})")
if task_type == "sync":
self._execute_in_process_pool(self.sync_and_download)
elif task_type == "reprocess":
self._execute_in_process_pool(self.reprocess_activities)
elif task_type == "api":
# Placeholder for high-priority API tasks
logger.debug(f"Processing API task: {data}")
self.task_queue.task_done()
except Exception as e:
logger.error(f"Task processing error: {str(e)}")
except asyncio.TimeoutError:
# Timeout is normal when queue is empty
pass
logger.info("Task worker stopped")
def _execute_in_process_pool(self, func):
"""Execute function in process pool and handle results"""
try:
future = self.executor.submit(func)
# Block until done to maintain task order but won't block main thread
result = future.result()
logger.debug(f"Process pool task completed: {result}")
except Exception as e:
logger.error(f"Process pool task failed: {str(e)}")
def sync_and_download(self):
"""Scheduled job function"""
"""Scheduled job function (run in process pool)"""
session = None
try:
self.log_operation("sync", "started")
@@ -129,11 +183,12 @@ class GarminSyncDaemon:
client = GarminClient()
# Sync database first
sync_database(client)
with self.db_lock:
sync_database(client)
# Download missing activities
downloaded_count = 0
session = get_session()
session = get_legacy_session()
missing_activities = (
session.query(Activity).filter_by(downloaded=False).all()
)
@@ -165,11 +220,11 @@ class GarminSyncDaemon:
if metrics:
# Update metrics if available
activity.activity_type = metrics.get("activityType", {}).get("typeKey")
activity.duration = int(float(metrics.get("summaryDTO", {}).get("duration", 0)))
activity.distance = float(metrics.get("summaryDTO", {}).get("distance", 0))
activity.max_heart_rate = int(float(metrics.get("summaryDTO", {}).get("maxHR", 0)))
activity.avg_power = float(metrics.get("summaryDTO", {}).get("avgPower", 0))
activity.calories = int(float(metrics.get("summaryDTO", {}).get("calories", 0)))
activity.duration = int(float(metrics.get("duration", 0)))
activity.distance = float(metrics.get("distance", 0))
activity.max_heart_rate = int(float(metrics.get("maxHR", 0)))
activity.avg_power = float(metrics.get("avgPower", 0))
activity.calories = int(float(metrics.get("calories", 0)))
session.commit()
downloaded_count += 1
@@ -251,12 +306,28 @@ class GarminSyncDaemon:
"""Start FastAPI web server in a separate thread"""
try:
import uvicorn
from .web.app import app
# Add shutdown hook to stop worker thread
@app.on_event("shutdown")
def shutdown_event():
logger.info("Web server shutting down")
self.running = False
self.worker_thread.join(timeout=5)
def run_server():
try:
uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")
# Use async execution model for better concurrency
config = uvicorn.Config(
app,
host="0.0.0.0",
port=port,
log_level="info",
workers=1,
loop="asyncio"
)
server = uvicorn.Server(config)
server.run()
except Exception as e:
logger.error(f"Failed to start web server: {e}")

View File

@@ -1,15 +1,20 @@
"""Database module for GarminSync application."""
"""Database module for GarminSync application with async support."""
import os
from datetime import datetime
from contextlib import asynccontextmanager
from sqlalchemy import Boolean, Column, Float, Integer, String, create_engine
from sqlalchemy import Boolean, Column, Float, Integer, String
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.ext.asyncio import async_sessionmaker
from sqlalchemy.future import select
from sqlalchemy.orm import declarative_base
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.orm import selectinload, joinedload
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Activity(Base):
"""Activity model representing a Garmin activity record."""
@@ -31,32 +36,24 @@ class Activity(Base):
last_sync = Column(String, nullable=True)
@classmethod
def get_paginated(cls, page=1, per_page=10):
"""Get paginated list of activities.
Args:
page: Page number (1-based)
per_page: Number of items per page
Returns:
Pagination object with activities
"""
session = get_session()
try:
query = session.query(cls).order_by(cls.start_time.desc())
page = int(page)
per_page = int(per_page)
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
return pagination
finally:
session.close()
async def get_paginated(cls, db, page=1, per_page=10):
"""Get paginated list of activities (async)."""
async with db.begin() as session:
query = select(cls).order_by(cls.start_time.desc())
result = await session.execute(query.offset((page-1)*per_page).limit(per_page))
activities = result.scalars().all()
count_result = await session.execute(select(select(cls).count()))
total = count_result.scalar_one()
return {
"items": activities,
"page": page,
"per_page": per_page,
"total": total,
"pages": (total + per_page - 1) // per_page
}
def to_dict(self):
"""Convert activity to dictionary representation.
Returns:
Dictionary with activity data
"""
"""Convert activity to dictionary representation."""
return {
"id": self.activity_id,
"name": self.filename or "Unnamed Activity",
@@ -83,6 +80,13 @@ class DaemonConfig(Base):
next_run = Column(String, nullable=True)
status = Column(String, default="stopped", nullable=False)
@classmethod
async def get(cls, db):
"""Get configuration record (async)."""
async with db.begin() as session:
result = await session.execute(select(cls))
return result.scalars().first()
class SyncLog(Base):
"""Sync log model for tracking sync operations."""
@@ -98,135 +102,133 @@ class SyncLog(Base):
activities_downloaded = Column(Integer, default=0, nullable=False)
def init_db():
"""Initialize database connection and create tables.
# Database initialization and session management
engine = None
async_session = None
Returns:
SQLAlchemy engine instance
"""
async def init_db():
"""Initialize database connection and create tables."""
global engine, async_session
db_path = os.getenv("DB_PATH", "data/garmin.db")
engine = create_engine(f"sqlite:///{db_path}")
Base.metadata.create_all(engine)
return engine
engine = create_async_engine(
f"sqlite+aiosqlite:///{db_path}",
pool_size=10,
max_overflow=20,
pool_pre_ping=True
)
async_session = async_sessionmaker(engine, expire_on_commit=False)
# Create tables if they don't exist
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
def get_session():
"""Create a new database session.
@asynccontextmanager
async def get_db():
"""Async context manager for database sessions."""
async with async_session() as session:
try:
yield session
await session.commit()
except SQLAlchemyError:
await session.rollback()
raise
Returns:
SQLAlchemy session instance
"""
engine = init_db()
Session = sessionmaker(bind=engine)
# Compatibility layer for legacy sync functions
def get_legacy_session():
"""Temporary synchronous session for migration purposes."""
db_path = os.getenv("DB_PATH", "data/garmin.db")
sync_engine = create_engine(f"sqlite:///{db_path}")
Base.metadata.create_all(sync_engine)
Session = sessionmaker(bind=sync_engine)
return Session()
from garminsync.activity_parser import get_activity_metrics
async def sync_database(garmin_client):
"""Sync local database with Garmin Connect activities (async)."""
from garminsync.activity_parser import get_activity_metrics
async with get_db() as session:
try:
activities = garmin_client.get_activities(0, 1000)
def sync_database(garmin_client):
"""Sync local database with Garmin Connect activities.
if not activities:
print("No activities returned from Garmin API")
return
Args:
garmin_client: GarminClient instance for API communication
"""
session = get_session()
try:
activities = garmin_client.get_activities(0, 1000)
for activity_data in activities:
if not isinstance(activity_data, dict):
print(f"Invalid activity data: {activity_data}")
continue
if not activities:
print("No activities returned from Garmin API")
return
activity_id = activity_data.get("activityId")
start_time = activity_data.get("startTimeLocal")
if not activity_id or not start_time:
print(f"Missing required fields in activity: {activity_data}")
continue
for activity_data in activities:
if not isinstance(activity_data, dict):
print(f"Invalid activity data: {activity_data}")
continue
activity_id = activity_data.get("activityId")
start_time = activity_data.get("startTimeLocal")
if not activity_id or not start_time:
print(f"Missing required fields in activity: {activity_data}")
continue
existing = session.query(Activity).filter_by(activity_id=activity_id).first()
# Create or update basic activity info
if not existing:
activity = Activity(
activity_id=activity_id,
start_time=start_time,
downloaded=False,
created_at=datetime.now().isoformat(),
last_sync=datetime.now().isoformat(),
result = await session.execute(
select(Activity).filter_by(activity_id=activity_id)
)
session.add(activity)
session.flush() # Assign ID
else:
activity = existing
existing = result.scalars().first()
# Create or update basic activity info
if not existing:
activity = Activity(
activity_id=activity_id,
start_time=start_time,
downloaded=False,
created_at=datetime.now().isoformat(),
last_sync=datetime.now().isoformat(),
)
session.add(activity)
else:
activity = existing
# Update metrics using shared parser
metrics = get_activity_metrics(activity, garmin_client)
if metrics:
activity.activity_type = metrics.get("activityType", {}).get("typeKey")
# ... rest of metric processing ...
# Update sync timestamp
activity.last_sync = datetime.now().isoformat()
await session.commit()
except SQLAlchemyError as e:
await session.rollback()
raise e
async def get_offline_stats():
"""Return statistics about cached data without API calls (async)."""
async with get_db() as session:
try:
result = await session.execute(select(Activity))
total = len(result.scalars().all())
# Update metrics using shared parser
metrics = get_activity_metrics(activity, garmin_client)
if metrics:
activity.activity_type = metrics.get("activityType", {}).get("typeKey")
# Extract duration in seconds
duration = metrics.get("summaryDTO", {}).get("duration")
if duration is not None:
activity.duration = int(float(duration))
# Extract distance in meters
distance = metrics.get("summaryDTO", {}).get("distance")
if distance is not None:
activity.distance = float(distance)
# Extract heart rates
max_hr = metrics.get("summaryDTO", {}).get("maxHR")
if max_hr is not None:
activity.max_heart_rate = int(float(max_hr))
avg_hr = metrics.get("summaryDTO", {}).get("avgHR", None) or \
metrics.get("summaryDTO", {}).get("averageHR", None)
if avg_hr is not None:
activity.avg_heart_rate = int(float(avg_hr))
# Extract power and calories
avg_power = metrics.get("summaryDTO", {}).get("avgPower")
if avg_power is not None:
activity.avg_power = float(avg_power)
calories = metrics.get("summaryDTO", {}).get("calories")
if calories is not None:
activity.calories = int(float(calories))
result = await session.execute(
select(Activity).filter_by(downloaded=True)
)
downloaded = len(result.scalars().all())
# Update sync timestamp
activity.last_sync = datetime.now().isoformat()
session.commit()
except SQLAlchemyError as e:
session.rollback()
raise e
finally:
session.close()
def get_offline_stats():
"""Return statistics about cached data without API calls.
Returns:
Dictionary with activity statistics
"""
session = get_session()
try:
total = session.query(Activity).count()
downloaded = session.query(Activity).filter_by(downloaded=True).count()
missing = total - downloaded
last_sync = session.query(Activity).order_by(Activity.last_sync.desc()).first()
return {
"total": total,
"downloaded": downloaded,
"missing": missing,
"last_sync": last_sync.last_sync if last_sync else "Never synced",
}
finally:
session.close()
result = await session.execute(
select(Activity).order_by(Activity.last_sync.desc())
)
last_sync = result.scalars().first()
return {
"total": total,
"downloaded": downloaded,
"missing": total - downloaded,
"last_sync": last_sync.last_sync if last_sync else "Never synced",
}
except SQLAlchemyError as e:
print(f"Database error: {e}")
return {
"total": 0,
"downloaded": 0,
"missing": 0,
"last_sync": "Error"
}