diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a86cd18 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,56 @@ +# Build artifacts +**/bin/ +**/obj/ +**/publish/ +**/.vs/ +**/.vscode/ + +# Git +.git/ +.gitignore +.gitattributes +.github/ + +# Documentation +*.md +!src/**/*.md +docs/ + +# Database files +**/*.sqlite +**/*.sqlite-shm +**/*.sqlite-wal +**/*.mdf +**/*.ldf + +# Node/Frontend +node_modules/ +**/node_modules/ + +# IDE +.vs/ +.vscode/ +*.suo +*.user +*.userosscache +*.sln.docstates + +# Tests +**/Tests/ + +# Docker +Dockerfile* +docker-compose*.yml +.dockerignore +.env +.env.* + +# CI/CD +.github/ + +# Claude +.claude/ + +# Misc +*.log +*.tmp diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..dac52ad --- /dev/null +++ b/.env.example @@ -0,0 +1,64 @@ +# =================================================================== +# Purdue.io Docker Configuration +# =================================================================== +# Copy this file to .env and update with your configuration +# +# IMPORTANT: Never commit .env file to version control! +# =================================================================== + +# ------------------------------------------------------------------- +# PostgreSQL Database Configuration +# ------------------------------------------------------------------- +POSTGRES_DB=purdueio +POSTGRES_USER=purdueio +POSTGRES_PASSWORD=changeme_in_production + +# ------------------------------------------------------------------- +# API Configuration +# ------------------------------------------------------------------- +# Port to expose the API on the host machine +API_PORT=8080 + +# ASP.NET Core environment (Development or Production) +ASPNETCORE_ENVIRONMENT=Production + +# ------------------------------------------------------------------- +# CatalogSync Configuration +# ------------------------------------------------------------------- + +# Sync Schedule (cron expression) +# Determines when the catalog sync runs automatically +# +# Default: 0 2 * * * (Daily at 2:00 AM) +# +# Common Examples: +# - "0 2 * * *" = Daily at 2:00 AM (DEFAULT) +# - "0 */6 * * *" = Every 6 hours +# - "0 */2 * * *" = Every 2 hours +# - "*/30 * * * *" = Every 30 minutes +# - "0 0 * * 0" = Weekly on Sunday at midnight +# +SYNC_SCHEDULE=0 2 * * * + +# Terms to Sync (optional) +# Comma-separated list of term codes to sync +# Example: SYNC_TERMS=202410,202510,202520 +# Leave empty to sync all available terms based on SYNC_ALL_TERMS setting +SYNC_TERMS= + +# Subjects to Sync (optional) +# Comma-separated list of subject codes to sync +# Example: SYNC_SUBJECTS=CS,MA,PHYS,ECE,ENGL +# Leave empty to sync all subjects +SYNC_SUBJECTS= + +# Sync All Terms +# Controls whether to sync all historical terms or only current/future terms +# - false (default): Only sync current and future terms +# - true: Sync all available terms including historical data +SYNC_ALL_TERMS=false + +# Run Once Mode (for testing/initialization) +# Set to "true" to run the sync once and exit (useful for testing) +# Set to "false" for continuous cron scheduling (production mode) +RUN_ONCE=false diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..359efdc --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,114 @@ +name: Docker Build and Publish + +on: + pull_request: + branches: [ main ] + paths: + - 'src/**' + - 'Dockerfile.*' + - 'docker/**' + - '.github/workflows/docker-publish.yml' + release: + types: [published] + workflow_dispatch: + inputs: + tag: + description: 'Tag to build and push (e.g., v1.0.0 or latest)' + required: true + default: 'latest' + +env: + REGISTRY: ghcr.io + IMAGE_NAME_API: ${{ github.repository }}/api + IMAGE_NAME_CATALOGSYNC: ${{ github.repository }}/catalogsync + +jobs: + # Verify builds on PRs (no push) + verify-build: + name: Verify Docker Builds + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + + strategy: + matrix: + service: [api, catalogsync] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build ${{ matrix.service }} (verification only) + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile.${{ matrix.service }} + push: false + tags: purdueio-${{ matrix.service }}:test + cache-from: type=gha + cache-to: type=gha,mode=max + + # Build and push on releases + build-and-push: + name: Build and Push Docker Images + runs-on: ubuntu-latest + if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' + permissions: + contents: read + packages: write + + strategy: + matrix: + include: + - service: api + image_name_var: IMAGE_NAME_API + - service: catalogsync + image_name_var: IMAGE_NAME_CATALOGSYNC + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ matrix.service == 'api' && env.IMAGE_NAME_API || env.IMAGE_NAME_CATALOGSYNC }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=${{ github.event.inputs.tag }},enable=${{ github.event_name == 'workflow_dispatch' }} + + - name: Build and push ${{ matrix.service }} + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile.${{ matrix.service }} + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Output image tags + run: | + echo "### Published ${{ matrix.service }} image :rocket:" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Tags:**" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index 8696095..397ac92 100644 --- a/.gitignore +++ b/.gitignore @@ -194,3 +194,13 @@ $RECYCLE.BIN/ .DS_Store *.mdf *.ldf + +# ========================= +# Docker +# ========================= + +# Environment files (contain secrets) +.env + +# Docker temporary files +.docker/ diff --git a/Dockerfile.api b/Dockerfile.api new file mode 100644 index 0000000..415b351 --- /dev/null +++ b/Dockerfile.api @@ -0,0 +1,40 @@ +# Stage 1: Build +FROM mcr.microsoft.com/dotnet/sdk:9.0 AS build +WORKDIR /src + +# Copy only the project files needed for API (and its dependencies) +COPY src/Api/Api.csproj ./Api/ +COPY src/Database/Database.csproj ./Database/ +COPY src/Database.Migrations.Sqlite/Database.Migrations.Sqlite.csproj ./Database.Migrations.Sqlite/ +COPY src/Database.Migrations.Npgsql/Database.Migrations.Npgsql.csproj ./Database.Migrations.Npgsql/ + +# Restore dependencies for API project only (not entire solution) +RUN dotnet restore Api/Api.csproj + +# Copy all source files +COPY src/ ./ + +# Publish the API project +RUN dotnet publish Api/Api.csproj -c Release -r linux-x64 \ + --self-contained=true \ + -p:PublishReadyToRun=true \ + -o /app/publish + +# Stage 2: Runtime +FROM mcr.microsoft.com/dotnet/aspnet:9.0 +WORKDIR /app + +# Copy published application (includes wwwroot if present) +COPY --from=build /app/publish . + +# Expose port 8080 (HTTP only for internal Docker network) +EXPOSE 8080 + +# Configure ASP.NET Core to listen on port 8080 +ENV ASPNETCORE_URLS=http://+:8080 \ + ASPNETCORE_ENVIRONMENT=Production + +# Run as non-root user for security +USER app + +ENTRYPOINT ["./Api"] diff --git a/Dockerfile.catalogsync b/Dockerfile.catalogsync new file mode 100644 index 0000000..3d2bc8b --- /dev/null +++ b/Dockerfile.catalogsync @@ -0,0 +1,51 @@ +# Stage 1: Build +FROM mcr.microsoft.com/dotnet/sdk:9.0 AS build +WORKDIR /src + +# Copy only the project files needed for CatalogSync (and its dependencies) +COPY src/CatalogSync/CatalogSync.csproj ./CatalogSync/ +COPY src/Database/Database.csproj ./Database/ +COPY src/Scraper/Scraper.csproj ./Scraper/ +COPY src/Database.Migrations.Sqlite/Database.Migrations.Sqlite.csproj ./Database.Migrations.Sqlite/ +COPY src/Database.Migrations.Npgsql/Database.Migrations.Npgsql.csproj ./Database.Migrations.Npgsql/ + +# Restore dependencies for CatalogSync project only (not entire solution) +RUN dotnet restore CatalogSync/CatalogSync.csproj + +# Copy all source files +COPY src/ ./ + +# Publish the CatalogSync project +RUN dotnet publish CatalogSync/CatalogSync.csproj -c Release -r linux-x64 \ + --self-contained=true \ + -p:PublishReadyToRun=true \ + -o /app/publish + +# Stage 2: Runtime with supercronic +FROM mcr.microsoft.com/dotnet/aspnet:9.0 +WORKDIR /app + +# Install supercronic for Docker-friendly cron scheduling +ENV SUPERCRONIC_URL=https://github.com/aptible/supercronic/releases/download/v0.2.29/supercronic-linux-amd64 \ + SUPERCRONIC=supercronic-linux-amd64 \ + SUPERCRONIC_SHA1SUM=cd48d45c4b10f3f0bfdd3a57d054cd05ac96812b + +RUN apt-get update && apt-get install -y --no-install-recommends curl \ + && curl -fsSLO "$SUPERCRONIC_URL" \ + && echo "${SUPERCRONIC_SHA1SUM} ${SUPERCRONIC}" | sha1sum -c - \ + && chmod +x "$SUPERCRONIC" \ + && mv "$SUPERCRONIC" /usr/local/bin/supercronic \ + && apt-get purge -y --auto-remove curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy published application +COPY --from=build /app/publish . + +# Copy entrypoint script and ensure it's executable by all users +COPY docker/catalogsync-entrypoint.sh /app/entrypoint.sh +RUN chmod 755 /app/entrypoint.sh + +# Run as non-root user for security +USER app + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/README.md b/README.md index 615047e..c840fe0 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,47 @@ there through the query tester at [http://api.purdue.io/](api.purdue.io/). # Building and Running -## Tools +## Docker (Recommended) -Purdue.io is written in C# on .NET 8. It will run natively on most major +The easiest way to run Purdue.io is using Docker. This will start all services including PostgreSQL, the API, and the CatalogSync scheduler. + +### Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) + +### Quick Start (Using Pre-built Images) + +```sh +# 1. Download the production docker-compose file +curl -O https://raw.githubusercontent.com/Purdue-io/PurdueApi/main/docker-compose.production.yml +curl -O https://raw.githubusercontent.com/Purdue-io/PurdueApi/main/.env.example + +# 2. Copy the example environment file +cp .env.example .env + +# 3. (Optional) Edit .env to configure sync schedule, database credentials, etc. + +# 4. Pull and start all services +docker compose -f docker-compose.production.yml up -d + +# 5. Watch the logs +docker compose -f docker-compose.production.yml logs -f + +# 6. Access the API at http://localhost:8080/odata +``` + +**Pre-built images are available at:** +- `ghcr.io/purdue-io/purdueapi/api:latest` +- `ghcr.io/purdue-io/purdueapi/catalogsync:latest` + +For building from source, detailed configuration options, and troubleshooting, see [docs/DOCKER.md](docs/DOCKER.md). + +## Local Development + +### Tools + +Purdue.io is written in C# on .NET 9. It will run natively on most major architectures and operating systems (Windows, Linux, Mac OS). Entity Framework is used to communicate with an underlying database provider. Currently, diff --git a/docker-compose.production.yml b/docker-compose.production.yml new file mode 100644 index 0000000..5787e44 --- /dev/null +++ b/docker-compose.production.yml @@ -0,0 +1,71 @@ +version: '3.8' + +# Production docker-compose file that pulls pre-built images from GitHub Container Registry +# Usage: docker compose -f docker-compose.production.yml up -d + +services: + postgres: + image: postgres:16-alpine + container_name: purdueio-postgres + restart: unless-stopped + environment: + POSTGRES_DB: ${POSTGRES_DB:-purdueio} + POSTGRES_USER: ${POSTGRES_USER:-purdueio} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-purdueio} + volumes: + - postgres-data:/var/lib/postgresql/data + networks: + - purdueio-net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-purdueio}"] + interval: 10s + timeout: 5s + retries: 5 + + api: + image: ghcr.io/purdue-io/purdueapi/api:latest + container_name: purdueio-api + restart: unless-stopped + ports: + - "${API_PORT:-8080}:8080" + environment: + DbProvider: Npgsql + DbConnectionString: "Host=postgres;Port=5432;Database=${POSTGRES_DB:-purdueio};Username=${POSTGRES_USER:-purdueio};Password=${POSTGRES_PASSWORD:-purdueio}" + ASPNETCORE_URLS: http://+:8080 + ASPNETCORE_ENVIRONMENT: ${ASPNETCORE_ENVIRONMENT:-Production} + depends_on: + postgres: + condition: service_healthy + networks: + - purdueio-net + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8080/odata/Terms || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + catalogsync: + image: ghcr.io/purdue-io/purdueapi/catalogsync:latest + container_name: purdueio-catalogsync + restart: unless-stopped + environment: + DB_PROVIDER: Npgsql + DB_CONNECTION_STRING: "Host=postgres;Port=5432;Database=${POSTGRES_DB:-purdueio};Username=${POSTGRES_USER:-purdueio};Password=${POSTGRES_PASSWORD:-purdueio}" + SYNC_SCHEDULE: ${SYNC_SCHEDULE:-0 2 * * *} + SYNC_TERMS: ${SYNC_TERMS:-} + SYNC_SUBJECTS: ${SYNC_SUBJECTS:-} + SYNC_ALL_TERMS: ${SYNC_ALL_TERMS:-false} + RUN_ONCE: ${RUN_ONCE:-false} + depends_on: + postgres: + condition: service_healthy + networks: + - purdueio-net + +networks: + purdueio-net: + driver: bridge + +volumes: + postgres-data: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d62994d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,87 @@ +version: '3.8' + +services: + postgres: + image: postgres:16-alpine + container_name: purdueio-postgres + restart: unless-stopped + environment: + POSTGRES_DB: ${POSTGRES_DB:-purdueio} + POSTGRES_USER: ${POSTGRES_USER:-purdueio} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-purdueio} + volumes: + - postgres-data:/var/lib/postgresql/data + networks: + - purdueio-net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-purdueio}"] + interval: 10s + timeout: 5s + retries: 5 + + api: + build: + context: . + dockerfile: Dockerfile.api + container_name: purdueio-api + restart: unless-stopped + ports: + - "${API_PORT:-8080}:8080" + environment: + DbProvider: Npgsql + DbConnectionString: "Host=postgres;Port=5432;Database=${POSTGRES_DB:-purdueio};Username=${POSTGRES_USER:-purdueio};Password=${POSTGRES_PASSWORD:-purdueio}" + ASPNETCORE_URLS: http://+:8080 + ASPNETCORE_ENVIRONMENT: ${ASPNETCORE_ENVIRONMENT:-Production} + depends_on: + postgres: + condition: service_healthy + networks: + - purdueio-net + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8080/odata/Terms || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + catalogsync: + build: + context: . + dockerfile: Dockerfile.catalogsync + container_name: purdueio-catalogsync + restart: unless-stopped + environment: + DB_PROVIDER: Npgsql + DB_CONNECTION_STRING: "Host=postgres;Port=5432;Database=${POSTGRES_DB:-purdueio};Username=${POSTGRES_USER:-purdueio};Password=${POSTGRES_PASSWORD:-purdueio}" + SYNC_SCHEDULE: ${SYNC_SCHEDULE:-0 2 * * *} + SYNC_TERMS: ${SYNC_TERMS:-} + SYNC_SUBJECTS: ${SYNC_SUBJECTS:-} + SYNC_ALL_TERMS: ${SYNC_ALL_TERMS:-false} + RUN_ONCE: ${RUN_ONCE:-false} + depends_on: + postgres: + condition: service_healthy + networks: + - purdueio-net + + # Optional: Frontend service reference (external repo) + # Uncomment and configure when frontend is containerized + # frontend: + # image: purdueio/frontend:latest + # container_name: purdueio-frontend + # restart: unless-stopped + # ports: + # - "3000:80" + # environment: + # API_URL: http://api:8080 + # depends_on: + # - api + # networks: + # - purdueio-net + +networks: + purdueio-net: + driver: bridge + +volumes: + postgres-data: diff --git a/docker/catalogsync-entrypoint.sh b/docker/catalogsync-entrypoint.sh new file mode 100644 index 0000000..4d2e4d5 --- /dev/null +++ b/docker/catalogsync-entrypoint.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e + +# Default values +SYNC_SCHEDULE="${SYNC_SCHEDULE:-0 2 * * *}" +DB_PROVIDER="${DB_PROVIDER:-Npgsql}" +DB_CONNECTION_STRING="${DB_CONNECTION_STRING:-Host=postgres;Port=5432;Database=purdueio;Username=purdueio;Password=purdueio}" +SYNC_ALL_TERMS="${SYNC_ALL_TERMS:-false}" + +# Build command-line arguments +ARGS="-d ${DB_PROVIDER} -c \"${DB_CONNECTION_STRING}\"" + +if [ "${SYNC_ALL_TERMS}" = "true" ]; then + ARGS="${ARGS} -a" +fi + +if [ -n "${SYNC_TERMS}" ]; then + ARGS="${ARGS} -t ${SYNC_TERMS}" +fi + +if [ -n "${SYNC_SUBJECTS}" ]; then + ARGS="${ARGS} -s ${SYNC_SUBJECTS}" +fi + +# One-off sync mode (for testing/initialization) +if [ "${RUN_ONCE}" = "true" ]; then + echo "Running one-time sync..." + echo "Command: ./CatalogSync ${ARGS}" + eval "./CatalogSync ${ARGS}" + exit $? +fi + +# Cron mode: Generate crontab and run supercronic +echo "Setting up scheduled sync with cron expression: ${SYNC_SCHEDULE}" +echo "CatalogSync will run with arguments: ${ARGS}" +echo "${SYNC_SCHEDULE} cd /app && ./CatalogSync ${ARGS}" > /app/crontab + +echo "Starting supercronic..." +exec supercronic /app/crontab diff --git a/docs/DOCKER.md b/docs/DOCKER.md new file mode 100644 index 0000000..fe8ca11 --- /dev/null +++ b/docs/DOCKER.md @@ -0,0 +1,643 @@ +# Docker Deployment Guide for Purdue.io + +This guide provides comprehensive instructions for deploying Purdue.io using Docker and Docker Compose. + +## Table of Contents + +- [Overview](#overview) +- [Prerequisites](#prerequisites) +- [Quick Start](#quick-start) +- [Architecture](#architecture) +- [Configuration](#configuration) +- [Usage Examples](#usage-examples) +- [Production Deployment](#production-deployment) +- [Troubleshooting](#troubleshooting) +- [Maintenance](#maintenance) + +## Overview + +The Purdue.io Docker deployment consists of three main services: + +1. **PostgreSQL** - Database for storing course catalog data +2. **API** - ASP.NET Core OData web service +3. **CatalogSync** - Scheduled sync process using supercronic + +All services are orchestrated using Docker Compose and communicate over a private network. + +## Prerequisites + +- **Docker** 20.10+ ([Install Docker](https://docs.docker.com/get-docker/)) +- **Docker Compose** 2.0+ ([Install Docker Compose](https://docs.docker.com/compose/install/)) +- At least 2GB of available disk space +- At least 1GB of available RAM + +## Quick Start + +### Option 1: Using Pre-built Images (Recommended for Production) + +Use published images from GitHub Container Registry - no build required! + +```bash +# Download the production docker-compose file +curl -O https://raw.githubusercontent.com/Purdue-io/PurdueApi/main/docker-compose.production.yml +curl -O https://raw.githubusercontent.com/Purdue-io/PurdueApi/main/.env.example + +# Copy the example environment file +cp .env.example .env + +# (Optional) Edit .env to customize configuration +nano .env + +# Pull and start all services +docker compose -f docker-compose.production.yml up -d + +# View logs +docker compose -f docker-compose.production.yml logs -f + +# Stop services +docker compose -f docker-compose.production.yml down +``` + +### Option 2: Building from Source (Development) + +Build images locally from source code: + +```bash +# Clone the repository +git clone https://github.com/Purdue-io/PurdueApi.git +cd PurdueApi + +# Copy the example environment file +cp .env.example .env + +# (Optional) Edit .env to customize configuration +nano .env + +# Build and start all services +docker compose up -d --build + +# View logs +docker compose logs -f + +# Stop services +docker compose down + +# Stop and remove volumes (deletes database data) +docker compose down -v +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Host Machine │ +│ │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ Docker Network (purdueio-net) │ │ +│ │ │ │ +│ │ ┌──────────────┐ ┌──────────────┐ │ │ +│ │ │ PostgreSQL │ │ API │ │ │ +│ │ │ :5432 │◄──┤ :8080 │ │ │ +│ │ └──────┬───────┘ └──────────────┘ │ │ +│ │ │ │ │ +│ │ │ ┌──────────────┐ │ │ +│ │ └───────────► CatalogSync │ │ │ +│ │ │ (supercronic)│ │ │ +│ │ └──────────────┘ │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ Port Mapping: localhost:8080 → api:8080 │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Service Details + +#### PostgreSQL +- **Image**: `postgres:16-alpine` +- **Purpose**: Persistent database storage +- **Volume**: `postgres-data` (persists data across container restarts) +- **Health Check**: Runs `pg_isready` every 10 seconds + +#### API +- **Build**: Custom Dockerfile (Dockerfile.api) +- **Framework**: ASP.NET Core 9.0 +- **Port**: Exposed on host port 8080 (configurable via API_PORT) +- **Dependencies**: Waits for PostgreSQL to be healthy +- **Auto-migrations**: Runs EF Core migrations on startup + +#### CatalogSync +- **Build**: Custom Dockerfile (Dockerfile.catalogsync) +- **Scheduler**: Supercronic (Docker-friendly cron) +- **Default Schedule**: Daily at 2:00 AM +- **Dependencies**: Waits for PostgreSQL to be healthy + +## Configuration + +All configuration is done through environment variables in the `.env` file. + +### Environment Variables Reference + +#### PostgreSQL Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `POSTGRES_DB` | `purdueio` | Database name | +| `POSTGRES_USER` | `purdueio` | Database username | +| `POSTGRES_PASSWORD` | `purdueio` | Database password | + +**Security Note**: Change the default password in production! + +#### API Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `API_PORT` | `8080` | Host port to expose the API on | +| `ASPNETCORE_ENVIRONMENT` | `Production` | Environment (Development/Production) | + +#### CatalogSync Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `SYNC_SCHEDULE` | `0 2 * * *` | Cron expression for sync schedule | +| `SYNC_TERMS` | _(empty)_ | Comma-separated term codes to sync | +| `SYNC_SUBJECTS` | _(empty)_ | Comma-separated subject codes to sync | +| `SYNC_ALL_TERMS` | `false` | Sync all terms vs current/future only | +| `RUN_ONCE` | `false` | Run once and exit (for testing) | + +### Cron Schedule Examples + +The `SYNC_SCHEDULE` variable uses standard cron syntax: + +``` +┌───────────── minute (0 - 59) +│ ┌───────────── hour (0 - 23) +│ │ ┌───────────── day of month (1 - 31) +│ │ │ ┌───────────── month (1 - 12) +│ │ │ │ ┌───────────── day of week (0 - 6) (Sunday = 0) +│ │ │ │ │ +│ │ │ │ │ +* * * * * +``` + +Common examples: +- `0 2 * * *` - Daily at 2:00 AM (DEFAULT) +- `0 */6 * * *` - Every 6 hours +- `0 */2 * * *` - Every 2 hours +- `*/30 * * * *` - Every 30 minutes +- `0 0 * * 0` - Weekly on Sunday at midnight +- `0 3 1 * *` - Monthly on the 1st at 3:00 AM + +## Usage Examples + +### Example 1: Development Setup + +For development with frequent syncs: + +```bash +# .env configuration +POSTGRES_PASSWORD=devpassword +API_PORT=8080 +SYNC_SCHEDULE=0 */2 * * * # Every 2 hours +SYNC_ALL_TERMS=false + +# Start services +docker-compose up -d +``` + +### Example 2: Production Setup with Specific Terms + +For production syncing only specific terms and subjects: + +```bash +# .env configuration +POSTGRES_DB=purdueio_prod +POSTGRES_USER=purdueio_prod +POSTGRES_PASSWORD= +API_PORT=8080 +SYNC_SCHEDULE=0 2 * * * # Daily at 2 AM +SYNC_TERMS=202510,202520 +SYNC_SUBJECTS=CS,MA,ECE,PHYS +SYNC_ALL_TERMS=false + +# Start services +docker-compose up -d +``` + +### Example 3: Initial Database Population + +Run a one-time sync to populate the database: + +```bash +# First, start PostgreSQL and API +docker-compose up -d postgres api + +# Wait for services to be healthy +docker-compose ps + +# Run one-time sync +docker-compose run --rm -e RUN_ONCE=true catalogsync + +# If successful, start the scheduled sync +docker-compose up -d catalogsync +``` + +### Example 4: API-Only Deployment + +If you want to sync externally and only run the API: + +```bash +# Start only PostgreSQL and API +docker-compose up -d postgres api + +# Verify API is running +curl http://localhost:8080/odata/Terms +``` + +## Production Deployment + +### Security Hardening + +1. **Use Strong Passwords** + ```bash + # Generate a strong password + openssl rand -base64 32 + + # Update .env + POSTGRES_PASSWORD= + ``` + +2. **Use Docker Secrets** (Docker Swarm) + ```yaml + # docker-compose.yml modifications + secrets: + postgres_password: + external: true + + services: + postgres: + secrets: + - postgres_password + environment: + POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password + ``` + +3. **Run Behind a Reverse Proxy** + + Use nginx or Traefik to handle HTTPS/TLS termination: + + ```nginx + server { + listen 443 ssl; + server_name api.purdue.io; + + ssl_certificate /path/to/cert.pem; + ssl_certificate_key /path/to/key.pem; + + location / { + proxy_pass http://localhost:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + } + ``` + +4. **Network Segmentation** + + Keep the database on an internal network, not exposed to the internet. + +### Resource Limits + +Add resource limits to prevent services from consuming excessive resources: + +```yaml +# docker-compose.yml modifications +services: + postgres: + deploy: + resources: + limits: + cpus: '1' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + + api: + deploy: + resources: + limits: + cpus: '2' + memory: 512M + reservations: + cpus: '1' + memory: 256M +``` + +### Scaling the API + +Scale the API service horizontally for high availability: + +```bash +# Start 3 API instances +docker-compose up -d --scale api=3 + +# Use a load balancer (nginx, HAProxy, traefik) to distribute traffic +``` + +**Important**: Only run ONE CatalogSync instance to avoid concurrent sync conflicts. + +### Monitoring + +1. **Health Checks** + ```bash + # Check service health + docker-compose ps + + # Inspect health check logs + docker inspect purdueio-api | grep -A 10 Health + ``` + +2. **Application Logs** + ```bash + # View all logs + docker-compose logs + + # Follow API logs + docker-compose logs -f api + + # View last 100 lines + docker-compose logs --tail=100 catalogsync + ``` + +3. **Resource Usage** + ```bash + # Monitor resource usage + docker stats + ``` + +4. **Centralized Logging** + + Configure Docker logging drivers for centralized log management: + ```yaml + # docker-compose.yml + x-logging: &default-logging + driver: json-file + options: + max-size: "10m" + max-file: "3" + + services: + api: + logging: *default-logging + ``` + +## Troubleshooting + +### Services Won't Start + +**Symptom**: `docker-compose up` fails or services are unhealthy + +**Solutions**: +```bash +# Check service status +docker-compose ps + +# View logs for errors +docker-compose logs + +# Check disk space +df -h + +# Check if ports are already in use +netstat -tuln | grep 8080 +netstat -tuln | grep 5432 +``` + +### API Cannot Connect to Database + +**Symptom**: API logs show database connection errors + +**Solutions**: +```bash +# Verify PostgreSQL is healthy +docker-compose ps postgres + +# Check PostgreSQL logs +docker-compose logs postgres + +# Verify network connectivity +docker-compose exec api ping postgres + +# Ensure connection string matches .env settings +docker-compose config | grep DbConnectionString +``` + +### Migrations Fail + +**Symptom**: API fails to start due to migration errors + +**Solutions**: +```bash +# View API logs +docker-compose logs api + +# Reset database (WARNING: Deletes all data) +docker-compose down -v +docker-compose up -d + +# Manual migration (if needed) +docker-compose exec api dotnet ef database update +``` + +### CatalogSync Not Running + +**Symptom**: Sync doesn't execute on schedule + +**Solutions**: +```bash +# Check CatalogSync logs +docker-compose logs catalogsync + +# Verify cron schedule is valid +docker-compose exec catalogsync cat /app/crontab + +# Test one-time sync +docker-compose run --rm -e RUN_ONCE=true catalogsync + +# Verify supercronic is running +docker-compose exec catalogsync ps aux | grep supercronic +``` + +### Invalid Cron Expression + +**Symptom**: CatalogSync exits immediately + +**Solutions**: +```bash +# Check logs for validation errors +docker-compose logs catalogsync + +# Verify cron syntax using https://crontab.guru/ +# Fix SYNC_SCHEDULE in .env and restart +docker-compose restart catalogsync +``` + +### Out of Disk Space + +**Symptom**: Services fail with disk space errors + +**Solutions**: +```bash +# Check disk usage +docker system df + +# Remove unused images +docker image prune -a + +# Remove unused volumes +docker volume prune + +# Remove unused networks +docker network prune +``` + +### Performance Issues + +**Symptom**: API is slow or unresponsive + +**Solutions**: +```bash +# Check resource usage +docker stats + +# Check PostgreSQL query performance +docker-compose exec postgres psql -U purdueio -c "SELECT * FROM pg_stat_activity;" + +# Add database indexes (if needed) +# Check API logs for slow queries + +# Scale API horizontally +docker-compose up -d --scale api=3 +``` + +## Maintenance + +### Backing Up the Database + +```bash +# Backup database to file +docker-compose exec postgres pg_dump -U purdueio purdueio > backup-$(date +%Y%m%d).sql + +# Automated daily backups (crontab) +0 3 * * * cd /path/to/PurdueApi && docker-compose exec -T postgres pg_dump -U purdueio purdueio | gzip > /backups/purdueio-$(date +\%Y\%m\%d).sql.gz +``` + +### Restoring from Backup + +```bash +# Stop API and CatalogSync +docker-compose stop api catalogsync + +# Restore database +cat backup-20240115.sql | docker-compose exec -T postgres psql -U purdueio purdueio + +# Restart services +docker-compose start api catalogsync +``` + +### Updating Images + +```bash +# Pull latest images +docker-compose pull + +# Rebuild custom images +docker-compose build --no-cache + +# Restart with new images +docker-compose up -d +``` + +### Viewing Database Data + +```bash +# Connect to PostgreSQL +docker-compose exec postgres psql -U purdueio + +# Useful queries +\dt # List tables +SELECT COUNT(*) FROM "Courses"; # Count courses +SELECT COUNT(*) FROM "Sections"; # Count sections +SELECT * FROM "Terms"; # List terms +\q # Quit +``` + +### Cleanup + +```bash +# Stop and remove containers +docker-compose down + +# Stop and remove containers + volumes (deletes database data) +docker-compose down -v + +# Remove all images +docker-compose down --rmi all + +# Full cleanup (containers, volumes, images) +docker-compose down -v --rmi all --remove-orphans +``` + +## Published Container Images + +Pre-built Docker images are automatically published to GitHub Container Registry on every release: + +- **API**: `ghcr.io/purdue-io/purdueapi/api:latest` +- **CatalogSync**: `ghcr.io/purdue-io/purdueapi/catalogsync:latest` + +### Available Tags + +- `latest` - Most recent release from the main branch +- `v1.0.0` - Specific semantic version (e.g., v1.0.0, v1.1.0) +- `1` - Major version tag (e.g., 1, 2) +- `1.0` - Major.minor version tag (e.g., 1.0, 1.1) + +### Pulling Specific Versions + +```bash +# Pull latest version +docker pull ghcr.io/purdue-io/purdueapi/api:latest +docker pull ghcr.io/purdue-io/purdueapi/catalogsync:latest + +# Pull specific version +docker pull ghcr.io/purdue-io/purdueapi/api:v1.0.0 +docker pull ghcr.io/purdue-io/purdueapi/catalogsync:v1.0.0 + +# Use in docker-compose.production.yml +# Just change the image tag: +# image: ghcr.io/purdue-io/purdueapi/api:v1.0.0 +``` + +### Authentication + +Images are public and don't require authentication to pull. However, for private repositories, you may need to authenticate: + +```bash +echo $GITHUB_TOKEN | docker login ghcr.io -u USERNAME --password-stdin +``` + +## Additional Resources + +- [Docker Documentation](https://docs.docker.com/) +- [Docker Compose Documentation](https://docs.docker.com/compose/) +- [PostgreSQL Docker Image](https://hub.docker.com/_/postgres) +- [ASP.NET Core Docker Documentation](https://docs.microsoft.com/en-us/aspnet/core/host-and-deploy/docker/) +- [Supercronic Documentation](https://github.com/aptible/supercronic) +- [GitHub Container Registry Documentation](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry) +- [Purdue.io Wiki](https://github.com/Purdue-io/PurdueApi/wiki/) + +## Support + +For issues, questions, or contributions: +- [GitHub Issues](https://github.com/Purdue-io/PurdueApi/issues) +- [Contributing Guide](https://github.com/Purdue-io/PurdueApi/wiki/Contributing)