added LLM data extractiondocker compose up --build -d --force-recreate; docker compose logs -f

This commit is contained in:
2025-10-05 06:22:14 -07:00
parent 2f1bbefb94
commit 8d80431850
19 changed files with 937 additions and 24 deletions

View File

@@ -0,0 +1,26 @@
from fastapi import APIRouter
from app.api.routes import (
admin,
charts,
export,
foods,
llm,
meals,
plans,
templates,
tracker,
weekly_menu,
)
api_router = APIRouter()
api_router.include_router(tracker.router, tags=["tracker"])
api_router.include_router(foods.router, tags=["foods"])
api_router.include_router(meals.router, tags=["meals"])
api_router.include_router(templates.router, tags=["templates"])
api_router.include_router(charts.router, tags=["charts"])
api_router.include_router(admin.router, tags=["admin"])
api_router.include_router(weekly_menu.router, tags=["weekly_menu"])
api_router.include_router(plans.router, tags=["plans"])
api_router.include_router(export.router, tags=["export"])
api_router.include_router(llm.router, tags=["llm"])

View File

@@ -6,13 +6,21 @@ import shutil
import sqlite3
import logging
from datetime import datetime
from typing import Optional
# Import from the database module
from app.database import get_db, DATABASE_URL, engine
from main import templates
from app.models.llm_config import LLMConfig
from pydantic import BaseModel
router = APIRouter()
class LLMConfigUpdate(BaseModel):
openrouter_api_key: Optional[str] = None
preferred_model: str
browserless_api_key: Optional[str] = None
def backup_database(source_db_path, backup_db_path):
"""Backs up an SQLite database using the online backup API."""
logging.info(f"DEBUG: Starting backup - source: {source_db_path}, backup: {backup_db_path}")
@@ -81,6 +89,56 @@ async def admin_page(request: Request):
async def admin_imports_page(request: Request):
return templates.TemplateResponse(request, "admin/imports.html", {"request": request})
@router.get("/admin/llm_config", response_class=HTMLResponse)
async def admin_llm_config_page(request: Request, db: Session = Depends(get_db)):
logging.info("DEBUG: Starting llm_config route")
try:
llm_config = db.query(LLMConfig).first()
logging.info(f"DEBUG: LLMConfig query result: {llm_config}")
if not llm_config:
logging.info("DEBUG: No LLMConfig found, creating new one")
llm_config = LLMConfig()
db.add(llm_config)
db.commit()
db.refresh(llm_config)
logging.info(f"DEBUG: Created new LLMConfig: {llm_config}")
logging.info(f"DEBUG: Final llm_config object: {llm_config}")
logging.info("DEBUG: About to render llm_config.html template")
response = templates.TemplateResponse(
request,
"admin/llm_config.html",
{"request": request, "llm_config": llm_config}
)
logging.info("DEBUG: Template rendered successfully")
return response
except Exception as e:
logging.error(f"DEBUG: Error in llm_config route: {e}", exc_info=True)
raise
@router.post("/admin/llm_config", response_class=RedirectResponse)
async def update_llm_config(
request: Request,
openrouter_api_key: Optional[str] = Form(None),
preferred_model: str = Form(...),
browserless_api_key: Optional[str] = Form(None),
db: Session = Depends(get_db)
):
llm_config = db.query(LLMConfig).first()
if not llm_config:
llm_config = LLMConfig()
db.add(llm_config)
db.commit()
db.refresh(llm_config)
llm_config.openrouter_api_key = openrouter_api_key
llm_config.preferred_model = preferred_model
llm_config.browserless_api_key = browserless_api_key
db.commit()
db.refresh(llm_config)
return RedirectResponse(url="/admin/llm_config", status_code=303)
@router.get("/admin/backups", response_class=HTMLResponse)
async def admin_backups_page(request: Request):
BACKUP_DIR = "./backups"

167
app/api/routes/llm.py Normal file
View File

@@ -0,0 +1,167 @@
import base64
import json
import logging
import os
from logging.config import fileConfig
from typing import Optional
import httpx
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, Form
from fastapi.responses import HTMLResponse
from openai import OpenAI
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from app.core.config import templates
from app.database import get_db
from app.models.llm_config import LLMConfig
router = APIRouter()
@router.get("/llm", response_class=HTMLResponse, include_in_schema=False)
async def llm_food_extractor_page(request: Request):
return templates.TemplateResponse("llm_food_extractor.html", {"request": request})
class FoodItem(BaseModel):
name: Optional[str] = Field(None, description="Name of the food item")
brand: Optional[str] = Field(None, description="Brand name of the food item")
serving_size_g: float = Field(description="Actual serving size in grams as labeled on the page")
calories: Optional[int] = Field(None, description="Calories per actual serving")
protein_g: Optional[float] = Field(None, description="Protein in grams per actual serving")
carbohydrate_g: Optional[float] = Field(None, description="Carbohydrates in grams per actual serving")
fat_g: Optional[float] = Field(None, description="Fat in grams per actual serving")
fiber_g: Optional[float] = Field(None, description="Fiber in grams per actual serving")
sugar_g: Optional[float] = Field(None, description="Sugar in grams per actual serving")
sodium_mg: Optional[int] = Field(None, description="Sodium in milligrams per actual serving")
calcium_mg: Optional[int] = Field(None, description="Calcium in milligrams per actual serving")
potassium_mg: Optional[int] = Field(None, description="Potassium in milligrams per actual serving")
cholesterol_mg: Optional[int] = Field(None, description="Cholesterol in milligrams per actual serving")
@router.post("/llm/extract", response_model=FoodItem)
async def extract_food_data_from_llm(
request: Request,
url: Optional[str] = Form(None),
webpage_url: Optional[str] = Form(None),
image: Optional[UploadFile] = File(None),
db: Session = Depends(get_db)
):
logging.info("Starting food data extraction from LLM.")
llm_config = db.query(LLMConfig).first()
if not llm_config or not llm_config.openrouter_api_key:
logging.error("OpenRouter API key not configured.")
raise HTTPException(
status_code=500,
detail="OpenRouter API key not configured. Please configure it in the Admin section."
)
if not llm_config.browserless_api_key:
logging.error("Browserless API key not configured.")
raise HTTPException(
status_code=500,
detail="Browserless API key not configured. Please configure it in the Admin section."
)
logging.info(f"LLM config loaded: preferred_model={llm_config.preferred_model}")
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=llm_config.openrouter_api_key
)
# LLM prompt for extracting nutrition data from webpage content or images.
# Units: serving_size_g in grams; nutrition values per actual serving size (not normalized to 100g).
# All nutrition fields are in grams except sodium_mg, calcium_mg, potassium_mg, cholesterol_mg in milligrams.
prompt = """You are a nutrition data extractor. Your task is to analyze the provided information (image or website content) and extract the nutritional information for the food item. The output must be a single JSON object that conforms to the following schema. All nutritional values should be for the actual serving size as labeled on the page (e.g., if the page says "per 1 cup (240g)", use values for 240g serving).
JSON Schema:
{
"name": "string",
"brand": "string",
"serving_size_g": "float",
"calories": "integer",
"protein_g": "float",
"carbohydrate_g": "float",
"fat_g": "float",
"fiber_g": "float",
"sugar_g": "float",
"sodium_mg": "integer",
"calcium_mg": "integer",
"potassium_mg": "integer",
"cholesterol_mg": "integer"
}
The food name is usually the most prominent header or title on the page. Brand is the manufacturer or brand name if available. serving_size_g should be the actual grams for the serving size shown (e.g., 240 for 1 cup). If the food name is not available, set it to "unknown". If any of the nutritional values are not available, set them to null. Do not include any text or explanations outside of the JSON object in your response.
"""
messages = [{"role": "system", "content": prompt}]
content = []
if url:
logging.info(f"Processing image from URL: {url}")
content.append({"type": "image_url", "image_url": {"url": url}})
elif webpage_url:
logging.info(f"Processing content from webpage URL: {webpage_url}")
try:
async with httpx.AsyncClient() as client:
browserless_url = f"https://production-sfo.browserless.io/content?token={llm_config.browserless_api_key}"
headers = {
"Cache-Control": "no-cache",
"Content-Type": "application/json"
}
payload = {"url": webpage_url}
logging.info(f"Fetching content from Browserless API (POST): {browserless_url} with payload url={webpage_url}")
response = await client.post(browserless_url, headers=headers, json=payload, timeout=30.0)
logging.info(f"Browserless response status={response.status_code}, content_length={len(response.text) if response and response.text is not None else 0}")
response.raise_for_status()
content.append({"type": "text", "text": f"Extract nutritional data from this webpage content: {response.text}"})
logging.info("Successfully fetched webpage content.")
except httpx.HTTPStatusError as e:
status = e.response.status_code if getattr(e, "response", None) is not None else "unknown"
body = e.response.text if getattr(e, "response", None) is not None else ""
logging.error(f"Browserless HTTP error status={status}, body_snippet={body[:500]}", exc_info=True)
raise HTTPException(status_code=400, detail=f"Browserless HTTP {status}: unable to fetch webpage content")
except httpx.HTTPError as e:
logging.error(f"HTTP client error while fetching webpage content: {e}", exc_info=True)
raise HTTPException(status_code=400, detail=f"Could not fetch webpage content: {e}")
elif image:
logging.info(f"Processing uploaded image: {image.filename}")
image_data = await image.read()
base64_image = base64.b64encode(image_data).decode("utf-8")
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{base64_image}"}
})
logging.info("Successfully processed uploaded image.")
else:
logging.error("No input provided. Either a URL, a webpage URL, or an image is required.")
raise HTTPException(status_code=400, detail="Either a URL, a webpage URL, or an image must be provided.")
messages.append({"role": "user", "content": content})
logging.info(f"LLM prompt: {messages}")
try:
os.makedirs("/app/data", exist_ok=True)
with open("/app/data/llmprompt.txt", "wt") as file:
file.write(json.dumps(messages, indent=2))
logging.info("Wrote LLM prompt to /app/data/llmprompt.txt")
except Exception as e:
logging.warning(f"Could not write LLM prompt file: {e}", exc_info=True)
try:
openai_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=llm_config.openrouter_api_key
)
logging.info(f"Sending request to LLM with model: {llm_config.preferred_model}")
response = openai_client.chat.completions.create(
model=llm_config.preferred_model,
messages=messages,
response_format={"type": "json_object"}
)
food_data_str = response.choices[0].message.content
logging.info(f"LLM response: {food_data_str}")
food_data = json.loads(food_data_str)
logging.info("Successfully parsed LLM response.")
# Debug logs for serving size: trace actual serving_size_g from LLM, no rescaling applied
serving_size_g = food_data.get('serving_size_g')
logging.info(f"Extracted serving_size_g: {serving_size_g}g (actual serving size, no normalization to 100g)")
return FoodItem(**food_data)
except Exception as e:
logging.error(f"Error during LLM data extraction: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error extracting food data: {e}")

View File

@@ -1,3 +1,24 @@
from fastapi.templating import Jinja2Templates
from functools import lru_cache
from typing import Optional
templates = Jinja2Templates(directory="templates")
from fastapi.templating import Jinja2Templates
from pydantic_settings import BaseSettings, SettingsConfigDict
templates = Jinja2Templates(directory="templates")
class Settings(BaseSettings):
"""
Application settings.
Settings are loaded from environment variables.
"""
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
DATABASE_URL: str
SECRET_KEY: str
ALGORITHM: str
ACCESS_TOKEN_EXPIRE_MINUTES: int
@lru_cache()
def get_settings():
return Settings()

View File

@@ -35,6 +35,9 @@ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False} i
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
# Import all models to ensure they are registered with Base
from app.models.llm_config import LLMConfig
# Database Models
class Food(Base):
__tablename__ = "foods"

10
app/models/llm_config.py Normal file
View File

@@ -0,0 +1,10 @@
from sqlalchemy import Column, Integer, String
from app.database import Base
class LLMConfig(Base):
__tablename__ = "llm_configs"
id = Column(Integer, primary_key=True, index=True)
openrouter_api_key = Column(String, nullable=True)
preferred_model = Column(String, default="anthropic/claude-3.5-sonnet")
browserless_api_key = Column(String, nullable=True)