Skip to content

Commit e99fd66

Browse files
committed
Merge v10 polish: tfhub recovery + IGV HF fallback + FTO doc + bgzip PATH
2 parents 68c3945 + ab3cdba commit e99fd66

7 files changed

Lines changed: 274 additions & 12 deletions

File tree

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,35 @@ chorus health
854854

855855
Add the export to your shell rc file if you want it persistent.
856856

857+
#### Enformer fails with `saved_model.pb` not found after a partial download
858+
859+
TensorFlow Hub caches downloaded models at
860+
`/var/folders/.../T/tfhub_modules/` (macOS) or `/tmp/tfhub_modules/`
861+
(Linux). **This cache is outside `~/.chorus/` and survives chorus
862+
teardowns.** If an earlier Enformer download was interrupted, the
863+
cached directory ends up missing `saved_model.pb` and Enformer fails
864+
to load with:
865+
866+
```
867+
Trying to load a model of incompatible/unknown type. ... contains
868+
neither 'saved_model.pb' nor 'saved_model.pbtxt'.
869+
```
870+
871+
Clear the stale cache and retry:
872+
873+
```bash
874+
# macOS
875+
rm -rf /var/folders/*/*/T/tfhub_modules
876+
877+
# Linux
878+
rm -rf /tmp/tfhub_modules
879+
```
880+
881+
Chorus auto-detects the corrupted-cache case and clears it on the
882+
next `load_pretrained_model()` call, so this is only an issue if the
883+
first attempt after a fresh install fails — the second attempt will
884+
recover automatically.
885+
857886
### Memory Issues
858887
Some oracles require a significant memory (~8-16 GB) for predictions. Solutions:
859888
- Force CPU usage: `device='cpu'`

chorus/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@
77

88
__version__ = "0.1.0"
99

10+
# ---------------------------------------------------------------------------
11+
# PATH guard for subprocess tools (bgzip, tabix, samtools)
12+
# ---------------------------------------------------------------------------
13+
# When chorus is imported from a Python interpreter that wasn't launched
14+
# through ``mamba activate chorus`` (e.g. ``python -m jupyter nbconvert``
15+
# called by an outer script), the subprocess-level PATH doesn't include
16+
# the conda-env ``bin/`` directory. coolbox then calls ``bgzip``/``tabix``
17+
# and sees them as not-installed, spamming ERROR lines before falling back
18+
# to its in-memory reader. These tools are installed by ``environment.yml``;
19+
# we just need the Python interpreter's own bin dir on PATH.
20+
import os as _os
21+
import sys as _sys
22+
_env_bin = _os.path.dirname(_sys.executable)
23+
if _env_bin and _env_bin not in _os.environ.get("PATH", "").split(_os.pathsep):
24+
_os.environ["PATH"] = _env_bin + _os.pathsep + _os.environ.get("PATH", "")
25+
1026
# Import core classes
1127
from .core import (
1228
OracleBase,

chorus/analysis/_igv_report.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,63 @@
2929
# CDN <script> tag so report generation still succeeds.
3030
_IGV_CDN = "https://cdn.jsdelivr.net/npm/igv@3.1.1/dist/igv.min.js"
3131
_IGV_LOCAL = Path.home() / ".chorus" / "lib" / "igv.min.js"
32+
# HuggingFace mirror — secondary fallback for environments where the
33+
# institutional proxy MITMs TLS and breaks stdlib ``urllib`` but leaves
34+
# ``huggingface_hub`` (httpx + certifi) working. Requires the file to
35+
# exist in the dataset; gracefully no-ops if it doesn't.
36+
_IGV_HF_REPO = "lucapinello/chorus-backgrounds"
37+
_IGV_HF_FILENAME = "igv.min.js"
3238

3339

3440
def _ensure_igv_local() -> Path | None:
35-
"""Ensure ``_IGV_LOCAL`` exists; download it from the CDN on first use.
41+
"""Ensure ``_IGV_LOCAL`` exists; download it on first use.
3642
37-
Returns the local path when the file is available, ``None`` if the
38-
download failed (callers then fall back to the CDN <script> tag).
43+
Tries (1) the CDN via stdlib ``urllib`` (``download_with_resume``),
44+
then (2) the HuggingFace mirror via ``huggingface_hub`` if the CDN
45+
path fails (typical on SSL-MITM institutional networks where
46+
stdlib ``urllib`` rejects the proxy's self-signed cert but
47+
``httpx + certifi`` accepts it).
48+
49+
Returns the local path when the file is available, ``None`` if
50+
both downloads failed (callers then fall back to a CDN <script>
51+
tag in the rendered HTML).
3952
"""
4053
if _IGV_LOCAL.exists() and _IGV_LOCAL.stat().st_size > 0:
4154
return _IGV_LOCAL
55+
_IGV_LOCAL.parent.mkdir(parents=True, exist_ok=True)
56+
57+
# Attempt 1: CDN via stdlib urllib.
4258
try:
4359
from chorus.utils.http import download_with_resume
44-
_IGV_LOCAL.parent.mkdir(parents=True, exist_ok=True)
4560
download_with_resume(_IGV_CDN, _IGV_LOCAL, label="igv.min.js")
4661
if _IGV_LOCAL.exists() and _IGV_LOCAL.stat().st_size > 0:
47-
logger.info("Cached igv.min.js to %s — future reports will inline it.", _IGV_LOCAL)
62+
logger.info("Cached igv.min.js from CDN to %s.", _IGV_LOCAL)
63+
return _IGV_LOCAL
64+
except Exception as exc:
65+
logger.debug("CDN fetch of igv.min.js failed (%s); trying HF mirror.", exc)
66+
67+
# Attempt 2: HuggingFace mirror (works through SSL-MITM proxies
68+
# where stdlib urllib fails — huggingface_hub uses httpx+certifi).
69+
try:
70+
from huggingface_hub import hf_hub_download
71+
downloaded = hf_hub_download(
72+
_IGV_HF_REPO,
73+
filename=_IGV_HF_FILENAME,
74+
repo_type="dataset",
75+
local_dir=str(_IGV_LOCAL.parent),
76+
)
77+
# hf_hub_download returns the actual local path; move to
78+
# canonical _IGV_LOCAL if different.
79+
dp = Path(downloaded)
80+
if dp != _IGV_LOCAL and dp.exists():
81+
dp.replace(_IGV_LOCAL)
82+
if _IGV_LOCAL.exists() and _IGV_LOCAL.stat().st_size > 0:
83+
logger.info("Cached igv.min.js from HuggingFace mirror to %s.", _IGV_LOCAL)
4884
return _IGV_LOCAL
4985
except Exception as exc:
5086
logger.warning(
51-
"Could not pre-cache igv.min.js (%s); reports will reference %s at view time.",
87+
"Could not pre-cache igv.min.js from CDN or HF mirror (%s); "
88+
"reports will reference %s at view time.",
5289
exc, _IGV_CDN,
5390
)
5491
return None

chorus/oracles/enformer.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,35 @@
1616

1717
logger = logging.getLogger(__name__)
1818

19+
20+
def _load_enformer_with_tfhub_recovery(hub, weights: str):
21+
"""Call ``hub.load(weights)`` with retry on a corrupt tfhub cache.
22+
23+
TensorFlow Hub stores downloaded models at ``tfhub_modules/<hash>/``. If
24+
the first download was interrupted the directory exists but has no
25+
``saved_model.pb``/``saved_model.pbtxt`` — ``hub.load`` then raises
26+
``ValueError`` with the offending path in the message. We detect that
27+
exact failure, wipe the bad directory, and retry once. Any other error
28+
propagates.
29+
"""
30+
import re
31+
import shutil
32+
try:
33+
return hub.load(weights)
34+
except Exception as exc:
35+
msg = str(exc)
36+
if "saved_model.pb" not in msg:
37+
raise
38+
m = re.search(r"'([^']*tfhub_modules[^']+)'", msg)
39+
if not m:
40+
raise
41+
bad_dir = m.group(1)
42+
if os.path.isdir(bad_dir):
43+
logger.warning("Clearing corrupt tfhub cache at %s", bad_dir)
44+
shutil.rmtree(bad_dir, ignore_errors=True)
45+
return hub.load(weights)
46+
47+
1948
class EnformerOracle(OracleBase):
2049
"""Enformer oracle with automatic environment management."""
2150

@@ -133,7 +162,7 @@ def _load_direct(self, weights: str):
133162
logger.info("No GPU detected, using CPU")
134163

135164
os.environ["TFHUB_DOWNLOAD_PROGRESS"] = "1"
136-
enformer = hub.load(weights)
165+
enformer = _load_enformer_with_tfhub_recovery(hub, weights)
137166
self._enformer_model = enformer.model
138167
self.model = self._enformer_model
139168
self._load_track_metadata()

chorus/oracles/enformer_source/templates/load_template.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,28 @@
3232
else:
3333
print("No GPU detected, using CPU")
3434

35-
# Load the model
36-
enformer = hub.load(args['model_weights'])
35+
# Load the model. If tfhub's on-disk cache is corrupt (incomplete download
36+
# from a previous session — missing saved_model.pb), hub.load raises
37+
# "contains neither 'saved_model.pb' nor 'saved_model.pbtxt'". Detect this,
38+
# clear the bad cache directory, and retry once.
39+
def _load_with_tfhub_recovery(weights: str):
40+
import re, shutil
41+
try:
42+
return hub.load(weights)
43+
except Exception as exc:
44+
msg = str(exc)
45+
if "saved_model.pb" not in msg:
46+
raise
47+
m = re.search(r"'([^']*tfhub_modules[^']+)'", msg)
48+
if not m:
49+
raise
50+
bad_dir = m.group(1)
51+
if os.path.isdir(bad_dir):
52+
print(f"Clearing corrupt tfhub cache at {{bad_dir}}")
53+
shutil.rmtree(bad_dir, ignore_errors=True)
54+
return hub.load(weights)
55+
56+
enformer = _load_with_tfhub_recovery(args['model_weights'])
3757
# Get the actual model from the enformer object
3858
model = enformer.model
3959

examples/applications/variant_analysis/FTO_rs1421085/README.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,30 @@ to energy-storing white fat (Claussnitzer et al., NEJM 2015).
1010

1111
## Tracks
1212

13-
Predicted with AlphaGenome using subcutaneous adipose tissue and
14-
adipose-derived mesenchymal stem cell tracks:
15-
- ATAC (adipose tissue), CAGE (+/-), RNA-seq (+/-)
13+
The committed `example_output.md` is run with **HepG2 liver tracks**
14+
(DNASE, CEBPA/CEBPB ChIP, H3K27ac, CAGE) as a "nearest available
15+
metabolic cell type" — matching the example prompt. HepG2 is not the
16+
causal tissue: rs1421085 acts in adipocyte progenitors and IRX3 sits
17+
~500 kb away, so expect **minimal effects** in the HepG2 run. The
18+
example is included to show what a "no-signal" call looks like.
19+
20+
**For a scientifically ideal run**, switch to AlphaGenome's adipose
21+
tracks by changing the `assay_ids` in the prompt:
22+
23+
```python
24+
# Subcutaneous adipose tissue + adipose-derived mesenchymal stem cells
25+
assay_ids = [
26+
"ATAC/UBERON:0002190 ATAC-seq/.", # subcutaneous adipose
27+
"ATAC/CL:0002540 ATAC-seq/.", # ADMSC
28+
"CAGE/hCAGE UBERON:0002190/+", "CAGE/hCAGE UBERON:0002190/-",
29+
"RNA_SEQ/UBERON:0002190 polyA plus RNA-seq/+",
30+
"RNA_SEQ/UBERON:0002190 polyA plus RNA-seq/-",
31+
]
32+
```
1633

1734
## Biology
1835

1936
The effect is highly cell-type-specific to adipocyte progenitors. IRX3 is
2037
~500kb from the variant — only visible with AlphaGenome's 1Mb window.
38+
In the HepG2 run you'll see "No strong regulatory effects detected" —
39+
this is the **correct** result: the variant doesn't act in liver.

tests/test_error_recovery.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,115 @@ def test_missing_oracle_env_falls_back_gracefully(self, caplog):
202202
assert "chorus setup --oracle enformer" in msgs, (
203203
"log must quote the exact command the user needs to run"
204204
)
205+
206+
207+
# ---------------------------------------------------------------------------
208+
# v10 additions
209+
# ---------------------------------------------------------------------------
210+
211+
class TestTFHubCorruptCacheRecovery:
212+
"""If TensorFlow Hub's on-disk cache has a partial/corrupt download
213+
(missing ``saved_model.pb``), ``_load_enformer_with_tfhub_recovery``
214+
must clear the bad directory and retry ``hub.load`` once."""
215+
216+
def test_corrupt_cache_is_cleared_and_retry_succeeds(self, tmp_path):
217+
from chorus.oracles.enformer import _load_enformer_with_tfhub_recovery
218+
219+
bad_dir = tmp_path / "tfhub_modules" / "corrupt"
220+
bad_dir.mkdir(parents=True)
221+
(bad_dir / "variables").mkdir() # partial — missing saved_model.pb
222+
223+
calls = []
224+
class FakeHub:
225+
def load(self, weights):
226+
calls.append(weights)
227+
if len(calls) == 1:
228+
raise ValueError(
229+
f"Trying to load a model of incompatible/unknown type. "
230+
f"'{bad_dir}' contains neither 'saved_model.pb' "
231+
f"nor 'saved_model.pbtxt'."
232+
)
233+
return {"loaded": True}
234+
235+
result = _load_enformer_with_tfhub_recovery(FakeHub(), "https://tfhub.dev/enformer")
236+
assert result == {"loaded": True}
237+
assert len(calls) == 2, "should retry exactly once"
238+
assert not bad_dir.exists(), "corrupt cache dir must be removed before retry"
239+
240+
def test_unrelated_errors_propagate_unchanged(self):
241+
from chorus.oracles.enformer import _load_enformer_with_tfhub_recovery
242+
class FakeHub:
243+
def load(self, weights):
244+
raise RuntimeError("network unreachable")
245+
with pytest.raises(RuntimeError, match="network unreachable"):
246+
_load_enformer_with_tfhub_recovery(FakeHub(), "https://tfhub.dev/enformer")
247+
248+
249+
class TestIGVFallbackViaHuggingFace:
250+
"""When stdlib urllib fails (SSL MITM), ``_ensure_igv_local`` must
251+
try the HuggingFace mirror as a second fallback before giving up."""
252+
253+
def test_hf_fallback_when_cdn_fails(self, tmp_path, monkeypatch):
254+
from chorus.analysis import _igv_report
255+
256+
monkeypatch.setattr(_igv_report, "_IGV_LOCAL", tmp_path / "igv.min.js")
257+
258+
# CDN path raises SSL error (stdlib urllib on MITM'd network)
259+
def fake_download_with_resume(url, dest, **kw):
260+
import ssl
261+
raise ssl.SSLError("CERTIFICATE_VERIFY_FAILED")
262+
monkeypatch.setattr(
263+
"chorus.utils.http.download_with_resume", fake_download_with_resume
264+
)
265+
266+
# HF path succeeds — writes to the local_dir param
267+
hf_calls = []
268+
def fake_hf_hub_download(repo_id, filename, repo_type, local_dir, **kw):
269+
hf_calls.append((repo_id, filename, repo_type, local_dir))
270+
target = Path(local_dir) / "igv.min.js"
271+
target.parent.mkdir(parents=True, exist_ok=True)
272+
target.write_text("// fake igv.min.js payload " * 50)
273+
return str(target)
274+
275+
import huggingface_hub as _hfh
276+
monkeypatch.setattr(_hfh, "hf_hub_download", fake_hf_hub_download)
277+
278+
result = _igv_report._ensure_igv_local()
279+
assert result is not None
280+
assert result == tmp_path / "igv.min.js"
281+
assert result.exists()
282+
assert len(hf_calls) == 1
283+
assert hf_calls[0][0] == "lucapinello/chorus-backgrounds"
284+
assert hf_calls[0][1] == "igv.min.js"
285+
286+
def test_returns_none_when_both_fail(self, tmp_path, monkeypatch):
287+
from chorus.analysis import _igv_report
288+
289+
monkeypatch.setattr(_igv_report, "_IGV_LOCAL", tmp_path / "igv.min.js")
290+
monkeypatch.setattr(
291+
"chorus.utils.http.download_with_resume",
292+
lambda url, dest, **kw: (_ for _ in ()).throw(RuntimeError("cdn fail")),
293+
)
294+
import huggingface_hub as _hfh
295+
monkeypatch.setattr(
296+
_hfh, "hf_hub_download",
297+
lambda *a, **kw: (_ for _ in ()).throw(FileNotFoundError("hf fail")),
298+
)
299+
300+
assert _igv_report._ensure_igv_local() is None
301+
302+
303+
class TestChorusImportPatchesPath:
304+
"""Importing chorus must prepend the Python env's bin/ to PATH so
305+
coolbox subprocess calls find bgzip/tabix when nbconvert is
306+
launched outside ``mamba activate``."""
307+
308+
def test_env_bin_on_path_after_import(self):
309+
import os
310+
import sys
311+
import chorus # noqa: F401
312+
env_bin = os.path.dirname(sys.executable)
313+
assert env_bin in os.environ["PATH"].split(os.pathsep), (
314+
f"{env_bin} must be on PATH after importing chorus so coolbox "
315+
f"can find bgzip/tabix"
316+
)

0 commit comments

Comments
 (0)