refactor : precommit 적용

seyoung4503 · seyoung4503 · commit 3e40b3393e3c · 2026-03-02T20:49:46.000+09:00
diff --git a/docs/BaseComponent_ko.md b/docs/BaseComponent_ko.md
@@ -190,13 +190,15 @@ retriever = FunctionalComponent(my_retriever, name="MyRetriever", hook=hook)
 
 ```python
 from lang2sql.core.hooks import MemoryHook
+from lang2sql.flows.baseline import SequentialFlow
+
 hook = MemoryHook()
 
-flow = BaselineFlow(steps=[...], hook=hook)  # 또는 컴포넌트마다 hook 주입
-out = flow.run_query("지난달 매출")
+flow = SequentialFlow(steps=[...], hook=hook)  # 또는 컴포넌트마다 hook 주입
+out = flow.run("지난달 매출")
 
 # 이벤트 확인
-for e in hook.events:
+for e in hook.snapshot():
     print(e.phase, e.component, e.duration_ms, e.error)
 ```
 
diff --git a/docs/Hook_and_exception_ko.md b/docs/Hook_and_exception_ko.md
@@ -111,16 +111,16 @@ class MemoryHook:
 
 #### MemoryHook 사용 예시
 
-```py
+```python
 from lang2sql.core.hooks import MemoryHook
-from lang2sql.flows.baseline import BaselineFlow
+from lang2sql.flows.baseline import SequentialFlow
 
 hook = MemoryHook()
-flow = BaselineFlow(steps=[...], hook=hook)
+flow = SequentialFlow(steps=[...], hook=hook)
 
-out = flow.run_query("지난달 매출")
+out = flow.run("지난달 매출")
 
-for e in hook.events:
+for e in hook.snapshot():
     print(e.name, e.phase, e.component, e.duration_ms, e.error)
 ```
 
diff --git a/docs/tutorials/getting-started-without-datahub.md b/docs/tutorials/getting-started-without-datahub.md
@@ -122,19 +122,53 @@ print(f"FAISS index saved to: {OUTPUT_DIR}/catalog.faiss")
 
 ### 4) 실행
 
+v2 CLI는 외부 벡터 인덱스 경로를 인수로 받지 않습니다.
+앞서 생성한 FAISS 인덱스를 활용하려면 Python API로 파이프라인을 직접 구성합니다.
+
+```python
+# run_query.py
+import os
+from dotenv import load_dotenv
+from lang2sql import CatalogChunker, VectorRetriever
+from lang2sql.integrations.db import SQLAlchemyDB
+from lang2sql.integrations.embedding import OpenAIEmbedding
+from lang2sql.integrations.llm import OpenAILLM
+from lang2sql.integrations.vectorstore import FAISSVectorStore
+from lang2sql.flows.hybrid import HybridNL2SQL
+
+load_dotenv()
+
+INDEX_DIR = "./dev/table_info_db"
+embedding = OpenAIEmbedding(
+    model=os.getenv("OPEN_AI_EMBEDDING_MODEL", "text-embedding-3-large"),
+    api_key=os.getenv("OPEN_AI_KEY"),
+)
+
+# FAISS 인덱스 로드 후 파이프라인 구성
+store = FAISSVectorStore.load(f"{INDEX_DIR}/catalog.faiss")
+
+pipeline = HybridNL2SQL(
+    catalog=[],          # FAISS에 이미 인덱싱돼 있으므로 빈 리스트
+    llm=OpenAILLM(model=os.getenv("OPEN_AI_LLM_MODEL", "gpt-4o"), api_key=os.getenv("OPEN_AI_KEY")),
+    db=SQLAlchemyDB(os.getenv("DB_URL", "sqlite:///sample.db")),
+    embedding=embedding,
+    db_dialect=os.getenv("DB_TYPE", "sqlite"),
+)
+
+rows = pipeline.run("주문 수를 집계하는 SQL을 만들어줘")
+print(rows)
+```
+
+Streamlit UI:
+
 ```bash
-# Streamlit UI
 lang2sql run-streamlit
+```
 
-# CLI 예시 (FAISS 인덱스 사용)
-lang2sql query "주문 수를 집계하는 SQL을 만들어줘" \
-  --vectordb-type faiss \
-  --vectordb-location ./dev/table_info_db
+CLI (카탈로그 없이 baseline만 가능):
 
-# CLI 예시 (pgvector)
-lang2sql query "주문 수를 집계하는 SQL을 만들어줘" \
-  --vectordb-type pgvector \
-  --vectordb-location "postgresql://pgvector:pgvector@localhost:5432/postgres"
+```bash
+lang2sql query "주문 수를 집계해줘" --flow baseline --dialect sqlite
 ```
 
 ### 5) (선택) pgvector로 적재하기
@@ -229,4 +263,3 @@ VectorRetriever.from_chunks(
 print(f"pgvector collection populated: {TABLE}")
 ```
 
-주의: FAISS 디렉토리 또는 pgvector 컬렉션이 없으면 현재 코드는 DataHub에서 메타데이터를 가져와 인덱스를 생성하려고 시도합니다. DataHub를 사용하지 않는 경우 위 절차로 사전에 VectorDB를 만들어 두세요.
diff --git a/src/lang2sql/__init__.py b/src/lang2sql/__init__.py
@@ -1,4 +1,9 @@
-from .factory import build_db_from_env, build_embedding_from_env, build_explorer_from_url, build_llm_from_env
+from .factory import (
+    build_db_from_env,
+    build_embedding_from_env,
+    build_explorer_from_url,
+    build_llm_from_env,
+)
 from .components.enrichment.context_enricher import ContextEnricher
 from .components.enrichment.question_profiler import QuestionProfiler
 from .components.execution.sql_executor import SQLExecutor
@@ -50,6 +55,7 @@
 from .integrations.llm.gemini_ import GeminiLLM
 from .integrations.llm.huggingface_ import HuggingFaceLLM
 from .integrations.llm.ollama_ import OllamaLLM
+
 __all__ = [
     # Data types
     "CatalogEntry",
@@ -132,15 +138,16 @@
 # ---------------------------------------------------------------------------
 _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
     "DataHubCatalogLoader": (".integrations.catalog.datahub_", "DataHubCatalogLoader"),
-    "FAISSVectorStore":     (".integrations.vectorstore.faiss_", "FAISSVectorStore"),
-    "PGVectorStore":        (".integrations.vectorstore.pgvector_", "PGVectorStore"),
+    "FAISSVectorStore": (".integrations.vectorstore.faiss_", "FAISSVectorStore"),
+    "PGVectorStore": (".integrations.vectorstore.pgvector_", "PGVectorStore"),
 }
 
 
 def __getattr__(name: str):
     if name in _LAZY_IMPORTS:
         module_path, attr = _LAZY_IMPORTS[name]
         import importlib
+
         obj = getattr(importlib.import_module(module_path, package=__name__), attr)
         # Cache in module globals so subsequent accesses skip __getattr__
         globals()[name] = obj
diff --git a/src/lang2sql/core/ports.py b/src/lang2sql/core/ports.py
@@ -78,6 +78,8 @@ def list_tables(self, schema: str | None = None) -> list[str]: ...
 
     def get_ddl(self, table: str, *, schema: str | None = None) -> str: ...
 
-    def sample_data(self, table: str, *, limit: int = 5, schema: str | None = None) -> list[dict]: ...
+    def sample_data(
+        self, table: str, *, limit: int = 5, schema: str | None = None
+    ) -> list[dict]: ...
 
     def execute_read_only(self, sql: str) -> list[dict]: ...
diff --git a/src/lang2sql/integrations/db/sqlalchemy_.py b/src/lang2sql/integrations/db/sqlalchemy_.py
@@ -32,7 +32,17 @@ def execute(self, sql: str) -> list[dict[str, Any]]:
 
 
 _WRITE_PREFIXES = frozenset(
-    {"INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "REPLACE", "MERGE"}
+    {
+        "INSERT",
+        "UPDATE",
+        "DELETE",
+        "DROP",
+        "ALTER",
+        "CREATE",
+        "TRUNCATE",
+        "REPLACE",
+        "MERGE",
+    }
 )
 
 
@@ -51,7 +61,9 @@ def __init__(self, url: str, *, schema: str | None = None) -> None:
         self._schema = schema
 
     @classmethod
-    def from_engine(cls, engine: "Engine", *, schema: str | None = None) -> "SQLAlchemyExplorer":
+    def from_engine(
+        cls, engine: "Engine", *, schema: str | None = None
+    ) -> "SQLAlchemyExplorer":
         """기존 engine 공유용. 연결 풀 중복 방지."""
         instance = cls.__new__(cls)
         instance._engine = engine
@@ -86,7 +98,9 @@ def get_ddl(self, table: str, *, schema: str | None = None) -> str:
         t = SATable(table, metadata, autoload_with=self._engine, schema=resolved_schema)
         return str(CreateTable(t).compile(self._engine))
 
-    def sample_data(self, table: str, *, limit: int = 5, schema: str | None = None) -> list[dict]:
+    def sample_data(
+        self, table: str, *, limit: int = 5, schema: str | None = None
+    ) -> list[dict]:
         """실제 샘플 데이터 반환.
 
         f-string SQL 금지 — SQLAlchemy ORM select()로 identifier quoting 위임.
diff --git a/tests/test_components_vector_retriever.py b/tests/test_components_vector_retriever.py
@@ -535,7 +535,9 @@ def test_save_and_load_returns_same_results(tmp_path):
 
     store = FAISSVectorStore(index_path=path + ".faiss")
     chunks = CatalogChunker().split(CATALOG)
-    original = VectorRetriever.from_chunks(chunks, embedding=embedding, vectorstore=store)
+    original = VectorRetriever.from_chunks(
+        chunks, embedding=embedding, vectorstore=store
+    )
     original.save(path)
 
     loaded_store = FAISSVectorStore.load(path)
@@ -555,7 +557,9 @@ def test_load_registry_intact(tmp_path):
 
     store = FAISSVectorStore(index_path=path + ".faiss")
     chunks = CatalogChunker().split(CATALOG)
-    original = VectorRetriever.from_chunks(chunks, embedding=embedding, vectorstore=store)
+    original = VectorRetriever.from_chunks(
+        chunks, embedding=embedding, vectorstore=store
+    )
     original.save(path)
 
     loaded_store = FAISSVectorStore.load(path)
@@ -571,7 +575,9 @@ def test_save_raises_for_inmemory():
     """InMemoryVectorStore는 save()를 지원하지 않아 NotImplementedError가 발생한다."""
     embedding = FakeEmbeddingFAISS()
     chunks = CatalogChunker().split(CATALOG)
-    retriever = VectorRetriever.from_chunks(chunks, embedding=embedding)  # InMemory 기본값
+    retriever = VectorRetriever.from_chunks(
+        chunks, embedding=embedding
+    )  # InMemory 기본값
 
     with pytest.raises(NotImplementedError, match="does not support save"):
         retriever.save("/tmp/should_not_exist")
diff --git a/tests/test_integrations_sqlalchemy_explorer.py b/tests/test_integrations_sqlalchemy_explorer.py
@@ -5,11 +5,11 @@
 import pytest
 from sqlalchemy import create_engine, text
 
-
 # ---------------------------------------------------------------------------
 # Fixture: SQLite in-memory DB with FK schema
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture()
 def engine():
     eng = create_engine("sqlite:///:memory:")
@@ -29,7 +29,9 @@ def engine():
                 status TEXT DEFAULT 'pending'
             )
         """))
-        conn.execute(text("INSERT INTO customers VALUES (1, 'Alice', 'alice@example.com')"))
+        conn.execute(
+            text("INSERT INTO customers VALUES (1, 'Alice', 'alice@example.com')")
+        )
         conn.execute(text("INSERT INTO customers VALUES (2, 'Bob', 'bob@example.com')"))
         conn.execute(text("INSERT INTO orders VALUES (1, 1, 99.9, 'shipped')"))
         conn.execute(text("INSERT INTO orders VALUES (2, 2, 42.0, 'pending')"))
@@ -48,6 +50,7 @@ def explorer(engine):
 # Tests
 # ---------------------------------------------------------------------------
 
+
 def test_list_tables(explorer):
     tables = explorer.list_tables()
     assert set(tables) == {"customers", "orders"}
@@ -98,7 +101,9 @@ def test_execute_read_only_select(explorer):
 
 def test_execute_read_only_rejects_insert(explorer):
     with pytest.raises(ValueError, match="Write operations not allowed"):
-        explorer.execute_read_only("INSERT INTO customers VALUES (3, 'Eve', 'eve@x.com')")
+        explorer.execute_read_only(
+            "INSERT INTO customers VALUES (3, 'Eve', 'eve@x.com')"
+        )
 
 
 def test_execute_read_only_rejects_drop(explorer):