UCL-CORU · zmek · Feb 6, 2026 · Jan 20, 2026 · Jan 21, 2026 · Feb 6, 2026
@@ -512,7 +512,7 @@ from patientflow.viz.shap import plot_shap
 plot_shap(
     trained_models,
     test_visits,
-    exclude_from_training_data)
+    exclude_from_training_data=exclude_from_training_data)
 
 
 ```

@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -58,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -102,7 +102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -145,7 +145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -219,7 +219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -277,7 +277,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -312,7 +312,6 @@
     "        df=test_visits, \n",
     "        prediction_time=_prediction_time, \n",
     "        single_snapshot_per_visit=False,\n",
-    "        exclude_columns=exclude_from_training_data, \n",
     "        visit_col='visit_number'\n",
     "    )\n",
     "\n",
@@ -338,7 +337,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -380,7 +379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -435,7 +434,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -479,7 +478,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {

@@ -52,12 +52,18 @@
 )
 from patientflow.model_artifacts import TrainedClassifier
 
-warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
+# SettingWithCopyWarning was removed in pandas 3.0 (CoW is now default)
+if hasattr(pd.errors, "SettingWithCopyWarning"):
+    warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
 
 
 def add_missing_columns(pipeline, df):
     """Add missing columns required by the prediction pipeline from the training data.
 
+    This is a legacy function for older model artifacts that don't include
+    FeatureColumnTransformer in their pipeline. For newer models, the pipeline
+    handles column selection automatically via the transformer step.
+
     Parameters
     ----------
     pipeline : sklearn.pipeline.Pipeline
@@ -78,6 +84,9 @@ def add_missing_columns(pipeline, df):
     - latest_ : pd.NA
     - arrival_method : "None"
     - others : pd.NA
+
+    For newer models with FeatureColumnTransformer in the pipeline, this function
+    is not needed as the transformer handles column selection automatically.
     """
     # check input data for missing columns
     column_transformer = pipeline.named_steps["feature_transformer"]
@@ -435,9 +444,15 @@ def create_predictions(
         pipeline = classifier.pipeline
 
     # Add missing columns expected by the model
-    prediction_snapshots = add_missing_columns(pipeline, prediction_snapshots)
-
-    # Before we get predictions, we need to create a temp copy with the elapsed_los column in seconds
+    # For new models with FeatureColumnTransformer, the pipeline handles column selection automatically.
+    # For legacy models without the transformer, use the external helper function.
+    if "feature_columns" not in pipeline.named_steps:
+        # Legacy path: use external helper for older model artifacts
+        prediction_snapshots = add_missing_columns(pipeline, prediction_snapshots)
+
+    # Before we get predictions, we need to create a temp copy with the elapsed_los column in seconds.
+    # In the training data, elapsed_los is stored as seconds, so this conversion ensures
+    # the model sees the same representation at inference time.
     prediction_snapshots_temp = prediction_snapshots.copy()
     prediction_snapshots_temp["elapsed_los"] = prediction_snapshots_temp[
         "elapsed_los"

@@ -567,6 +567,25 @@ def prepare_patient_snapshots(
     # Filter by the time of day while keeping the original index
     df_tod = df[df["prediction_time"] == prediction_time].copy()
 
+    # Provide a helpful error message if no snapshots match the requested
+    # prediction_time. Downstream model training (e.g. time-series CV) fails
+    # with a less informative error when given an empty dataset.
+    if df_tod.empty:
+        available_times = sorted(df["prediction_time"].unique())
+        arg_type = type(prediction_time).__name__
+        col_dtype = df["prediction_time"].dtype
+        raise ValueError(
+            "No patient snapshots found for prediction_time "
+            f"{prediction_time}. "
+            "A common cause is a type/format mismatch between the inputs: "
+            f"(type of `prediction_time` argument: {arg_type}; "
+            f"dtype of `df['prediction_time']` column: {col_dtype}). "
+            "Check that the value you passed matches one of the "
+            f"available `prediction_time` values in the dataset: {available_times}. "
+            "If the types and formats match, another possibility is that there are "
+            "no visits with a snapshot at this time of day."
+        )
+
     if single_snapshot_per_visit:
         # Select one row for each visit
         df_single = select_one_snapshot_per_visit(df_tod, visit_col)