-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
64 lines (50 loc) · 2.27 KB
/
config.py
File metadata and controls
64 lines (50 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from pathlib import Path
from typing import Optional
# Root and main directories
ROOT_DIR = Path(__file__).parent.resolve()
TRANSCRIPTS_DIR = ROOT_DIR / "transcripts"
PROMPTS_DIR = ROOT_DIR / "prompts"
DATA_DIR = ROOT_DIR / "data"
# Model configuration
MODEL_NAME = "gemini-2.5-pro-latest"
PROMPT_VERSION = "v1"
VERSION_ID = f"{MODEL_NAME}_{PROMPT_VERSION}"
# Default channel for testing
DEFAULT_CHANNEL_ID = "UC3l6H774PloAg3XEz46-KQg"
# Batch processing configuration
BATCH_SIZE = 500 # WebShare's concurrency limit
# File naming patterns
TRANSCRIPT_FILENAME = "transcript.txt"
ERROR_FILE_PATTERN = "{video_id}_error.txt"
DOWNLOAD_REPORT_FILENAME = "download_report.json"
def get_transcript_path(channel_id: str, video_id: str) -> Path:
"""Get the path to a video's transcript file"""
return TRANSCRIPTS_DIR / channel_id / video_id / TRANSCRIPT_FILENAME
def get_analysis_path(channel_id: str, video_id: str) -> Path:
"""Get the path to a video's analysis file"""
return TRANSCRIPTS_DIR / channel_id / video_id / f"{VERSION_ID}.txt"
def get_channel_analysis_path(channel_id: str) -> Path:
"""Get the path to a channel's aggregated analysis file"""
return TRANSCRIPTS_DIR / channel_id / f"{VERSION_ID}.txt"
def get_global_analysis_path() -> Path:
"""Get the path to the global aggregated analysis file"""
return TRANSCRIPTS_DIR / f"{VERSION_ID}.txt"
def get_error_path(channel_id: str, video_id: str) -> Path:
"""Get the path to a video's error log file"""
error_dir = TRANSCRIPTS_DIR / channel_id / "errors"
return error_dir / ERROR_FILE_PATTERN.format(video_id=video_id)
def get_download_report_path(channel_id: str) -> Path:
"""Get the path to a channel's download report file"""
return TRANSCRIPTS_DIR / channel_id / DOWNLOAD_REPORT_FILENAME
def get_prompt_path(version: Optional[str] = None) -> Path:
"""Get the path to a prompt file"""
version = version or PROMPT_VERSION
return PROMPTS_DIR / f"{version}.txt"
# Create necessary directories
def ensure_directories():
"""Create all necessary directories if they don't exist"""
TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
PROMPTS_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(parents=True, exist_ok=True)
# Call this when the module is imported
ensure_directories()