added LLM data extractiondocker compose up --build -d --force-recreate; docker compose logs -f

2026-03-18 19:25:28 +00:00 · 2025-10-05 06:22:14 -07:00
parent 2f1bbefb94
commit 8d80431850
19 changed files with 937 additions and 24 deletions
--- a/app/api/routes/init.py
+++ b/app/api/routes/init.py
@@ -0,0 +1,26 @@
+from fastapi import APIRouter
+
+from app.api.routes import (
+    admin,
+    charts,
+    export,
+    foods,
+    llm,
+    meals,
+    plans,
+    templates,
+    tracker,
+    weekly_menu,
+)
+
+api_router = APIRouter()
+api_router.include_router(tracker.router, tags=["tracker"])
+api_router.include_router(foods.router, tags=["foods"])
+api_router.include_router(meals.router, tags=["meals"])
+api_router.include_router(templates.router, tags=["templates"])
+api_router.include_router(charts.router, tags=["charts"])
+api_router.include_router(admin.router, tags=["admin"])
+api_router.include_router(weekly_menu.router, tags=["weekly_menu"])
+api_router.include_router(plans.router, tags=["plans"])
+api_router.include_router(export.router, tags=["export"])
+api_router.include_router(llm.router, tags=["llm"])
--- a/app/api/routes/admin.py
+++ b/app/api/routes/admin.py
@@ -6,13 +6,21 @@ import shutil
 import sqlite3
 import logging
 from datetime import datetime
+from typing import Optional

 # Import from the database module
 from app.database import get_db, DATABASE_URL, engine
 from main import templates
+from app.models.llm_config import LLMConfig
+from pydantic import BaseModel

 router = APIRouter()

+class LLMConfigUpdate(BaseModel):
+    openrouter_api_key: Optional[str] = None
+    preferred_model: str
+    browserless_api_key: Optional[str] = None
+
 def backup_database(source_db_path, backup_db_path):
    """Backs up an SQLite database using the online backup API."""
    logging.info(f"DEBUG: Starting backup - source: {source_db_path}, backup: {backup_db_path}")
@@ -81,6 +89,56 @@ async def admin_page(request: Request):
 async def admin_imports_page(request: Request):
    return templates.TemplateResponse(request, "admin/imports.html", {"request": request})

+@router.get("/admin/llm_config", response_class=HTMLResponse)
+async def admin_llm_config_page(request: Request, db: Session = Depends(get_db)):
+    logging.info("DEBUG: Starting llm_config route")
+    try:
+        llm_config = db.query(LLMConfig).first()
+        logging.info(f"DEBUG: LLMConfig query result: {llm_config}")
+        if not llm_config:
+            logging.info("DEBUG: No LLMConfig found, creating new one")
+            llm_config = LLMConfig()
+            db.add(llm_config)
+            db.commit()
+            db.refresh(llm_config)
+            logging.info(f"DEBUG: Created new LLMConfig: {llm_config}")
+        logging.info(f"DEBUG: Final llm_config object: {llm_config}")
+
+        logging.info("DEBUG: About to render llm_config.html template")
+        response = templates.TemplateResponse(
+            request,
+            "admin/llm_config.html",
+            {"request": request, "llm_config": llm_config}
+        )
+        logging.info("DEBUG: Template rendered successfully")
+        return response
+    except Exception as e:
+        logging.error(f"DEBUG: Error in llm_config route: {e}", exc_info=True)
+        raise
+
+@router.post("/admin/llm_config", response_class=RedirectResponse)
+async def update_llm_config(
+    request: Request,
+    openrouter_api_key: Optional[str] = Form(None),
+    preferred_model: str = Form(...),
+    browserless_api_key: Optional[str] = Form(None),
+    db: Session = Depends(get_db)
+):
+    llm_config = db.query(LLMConfig).first()
+    if not llm_config:
+        llm_config = LLMConfig()
+        db.add(llm_config)
+        db.commit()
+        db.refresh(llm_config)
+
+    llm_config.openrouter_api_key = openrouter_api_key
+    llm_config.preferred_model = preferred_model
+    llm_config.browserless_api_key = browserless_api_key
+    db.commit()
+    db.refresh(llm_config)
+    
+    return RedirectResponse(url="/admin/llm_config", status_code=303)
+
@router.get("/admin/backups", response_class=HTMLResponse)
 async def admin_backups_page(request: Request):
    BACKUP_DIR = "./backups"
--- a/app/api/routes/llm.py
+++ b/app/api/routes/llm.py
@@ -0,0 +1,167 @@
+import base64
+import json
+import logging
+import os
+from logging.config import fileConfig
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, Form
+from fastapi.responses import HTMLResponse
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from sqlalchemy.orm import Session
+
+from app.core.config import templates
+from app.database import get_db
+from app.models.llm_config import LLMConfig
+
+router = APIRouter()
+
+@router.get("/llm", response_class=HTMLResponse, include_in_schema=False)
+async def llm_food_extractor_page(request: Request):
+    return templates.TemplateResponse("llm_food_extractor.html", {"request": request})
+
+class FoodItem(BaseModel):
+    name: Optional[str] = Field(None, description="Name of the food item")
+    brand: Optional[str] = Field(None, description="Brand name of the food item")
+    serving_size_g: float = Field(description="Actual serving size in grams as labeled on the page")
+    calories: Optional[int] = Field(None, description="Calories per actual serving")
+    protein_g: Optional[float] = Field(None, description="Protein in grams per actual serving")
+    carbohydrate_g: Optional[float] = Field(None, description="Carbohydrates in grams per actual serving")
+    fat_g: Optional[float] = Field(None, description="Fat in grams per actual serving")
+    fiber_g: Optional[float] = Field(None, description="Fiber in grams per actual serving")
+    sugar_g: Optional[float] = Field(None, description="Sugar in grams per actual serving")
+    sodium_mg: Optional[int] = Field(None, description="Sodium in milligrams per actual serving")
+    calcium_mg: Optional[int] = Field(None, description="Calcium in milligrams per actual serving")
+    potassium_mg: Optional[int] = Field(None, description="Potassium in milligrams per actual serving")
+    cholesterol_mg: Optional[int] = Field(None, description="Cholesterol in milligrams per actual serving")
+
+@router.post("/llm/extract", response_model=FoodItem)
+async def extract_food_data_from_llm(
+    request: Request,
+    url: Optional[str] = Form(None),
+    webpage_url: Optional[str] = Form(None),
+    image: Optional[UploadFile] = File(None),
+    db: Session = Depends(get_db)
+):
+    logging.info("Starting food data extraction from LLM.")
+    llm_config = db.query(LLMConfig).first()
+    if not llm_config or not llm_config.openrouter_api_key:
+        logging.error("OpenRouter API key not configured.")
+        raise HTTPException(
+            status_code=500,
+            detail="OpenRouter API key not configured. Please configure it in the Admin section."
+        )
+    if not llm_config.browserless_api_key:
+        logging.error("Browserless API key not configured.")
+        raise HTTPException(
+            status_code=500,
+            detail="Browserless API key not configured. Please configure it in the Admin section."
+        )
+    logging.info(f"LLM config loaded: preferred_model={llm_config.preferred_model}")
+
+    client = OpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key=llm_config.openrouter_api_key
+    )
+
+    # LLM prompt for extracting nutrition data from webpage content or images.
+    # Units: serving_size_g in grams; nutrition values per actual serving size (not normalized to 100g).
+    # All nutrition fields are in grams except sodium_mg, calcium_mg, potassium_mg, cholesterol_mg in milligrams.
+    prompt = """You are a nutrition data extractor. Your task is to analyze the provided information (image or website content) and extract the nutritional information for the food item. The output must be a single JSON object that conforms to the following schema. All nutritional values should be for the actual serving size as labeled on the page (e.g., if the page says "per 1 cup (240g)", use values for 240g serving).
+
+     JSON Schema:
+     {
+       "name": "string",
+       "brand": "string",
+       "serving_size_g": "float",
+       "calories": "integer",
+       "protein_g": "float",
+       "carbohydrate_g": "float",
+       "fat_g": "float",
+       "fiber_g": "float",
+       "sugar_g": "float",
+       "sodium_mg": "integer",
+       "calcium_mg": "integer",
+       "potassium_mg": "integer",
+       "cholesterol_mg": "integer"
+     }
+
+     The food name is usually the most prominent header or title on the page. Brand is the manufacturer or brand name if available. serving_size_g should be the actual grams for the serving size shown (e.g., 240 for 1 cup). If the food name is not available, set it to "unknown". If any of the nutritional values are not available, set them to null. Do not include any text or explanations outside of the JSON object in your response.
+     """
+    messages = [{"role": "system", "content": prompt}]
+
+    content = []
+    if url:
+        logging.info(f"Processing image from URL: {url}")
+        content.append({"type": "image_url", "image_url": {"url": url}})
+    elif webpage_url:
+        logging.info(f"Processing content from webpage URL: {webpage_url}")
+        try:
+            async with httpx.AsyncClient() as client:
+                browserless_url = f"https://production-sfo.browserless.io/content?token={llm_config.browserless_api_key}"
+                headers = {
+                    "Cache-Control": "no-cache",
+                    "Content-Type": "application/json"
+                }
+                payload = {"url": webpage_url}
+                logging.info(f"Fetching content from Browserless API (POST): {browserless_url} with payload url={webpage_url}")
+                response = await client.post(browserless_url, headers=headers, json=payload, timeout=30.0)
+                logging.info(f"Browserless response status={response.status_code}, content_length={len(response.text) if response and response.text is not None else 0}")
+                response.raise_for_status()
+                content.append({"type": "text", "text": f"Extract nutritional data from this webpage content: {response.text}"})
+                logging.info("Successfully fetched webpage content.")
+        except httpx.HTTPStatusError as e:
+            status = e.response.status_code if getattr(e, "response", None) is not None else "unknown"
+            body = e.response.text if getattr(e, "response", None) is not None else ""
+            logging.error(f"Browserless HTTP error status={status}, body_snippet={body[:500]}", exc_info=True)
+            raise HTTPException(status_code=400, detail=f"Browserless HTTP {status}: unable to fetch webpage content")
+        except httpx.HTTPError as e:
+            logging.error(f"HTTP client error while fetching webpage content: {e}", exc_info=True)
+            raise HTTPException(status_code=400, detail=f"Could not fetch webpage content: {e}")
+    elif image:
+        logging.info(f"Processing uploaded image: {image.filename}")
+        image_data = await image.read()
+        base64_image = base64.b64encode(image_data).decode("utf-8")
+        content.append({
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{base64_image}"}
+        })
+        logging.info("Successfully processed uploaded image.")
+    else:
+        logging.error("No input provided. Either a URL, a webpage URL, or an image is required.")
+        raise HTTPException(status_code=400, detail="Either a URL, a webpage URL, or an image must be provided.")
+
+    messages.append({"role": "user", "content": content})
+    logging.info(f"LLM prompt: {messages}")
+    try:
+        os.makedirs("/app/data", exist_ok=True)
+        with open("/app/data/llmprompt.txt", "wt") as file:
+            file.write(json.dumps(messages, indent=2))
+        logging.info("Wrote LLM prompt to /app/data/llmprompt.txt")
+    except Exception as e:
+        logging.warning(f"Could not write LLM prompt file: {e}", exc_info=True)
+
+    try:
+        openai_client = OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=llm_config.openrouter_api_key
+        )
+        logging.info(f"Sending request to LLM with model: {llm_config.preferred_model}")
+        response = openai_client.chat.completions.create(
+            model=llm_config.preferred_model,
+            messages=messages,
+            response_format={"type": "json_object"}
+        )
+        food_data_str = response.choices[0].message.content
+        logging.info(f"LLM response: {food_data_str}")
+        food_data = json.loads(food_data_str)
+        logging.info("Successfully parsed LLM response.")
+        # Debug logs for serving size: trace actual serving_size_g from LLM, no rescaling applied
+        serving_size_g = food_data.get('serving_size_g')
+        logging.info(f"Extracted serving_size_g: {serving_size_g}g (actual serving size, no normalization to 100g)")
+        return FoodItem(**food_data)
+    except Exception as e:
+        logging.error(f"Error during LLM data extraction: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error extracting food data: {e}")
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -1,3 +1,24 @@
-from fastapi.templating import Jinja2Templates
+from functools import lru_cache
+from typing import Optional

-templates = Jinja2Templates(directory="templates")
+from fastapi.templating import Jinja2Templates
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+templates = Jinja2Templates(directory="templates")
+
+class Settings(BaseSettings):
+    """
+    Application settings.
+    Settings are loaded from environment variables.
+    """
+    model_config = SettingsConfigDict(env_file=".env", extra="ignore")
+
+    DATABASE_URL: str
+    SECRET_KEY: str
+    ALGORITHM: str
+    ACCESS_TOKEN_EXPIRE_MINUTES: int
+
+
+@lru_cache()
+def get_settings():
+    return Settings()
--- a/app/database.py
+++ b/app/database.py
@@ -35,6 +35,9 @@ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False} i
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 Base = declarative_base()

+# Import all models to ensure they are registered with Base
+from app.models.llm_config import LLMConfig
+
 # Database Models
 class Food(Base):
    __tablename__ = "foods"
--- a/app/models/llm_config.py
+++ b/app/models/llm_config.py
@@ -0,0 +1,10 @@
+from sqlalchemy import Column, Integer, String
+from app.database import Base
+
+class LLMConfig(Base):
+    __tablename__ = "llm_configs"
+
+    id = Column(Integer, primary_key=True, index=True)
+    openrouter_api_key = Column(String, nullable=True)
+    preferred_model = Column(String, default="anthropic/claude-3.5-sonnet")
+    browserless_api_key = Column(String, nullable=True)