Skip to content

Commit 567a9f8

Browse files
authored
Merge pull request #1 from chaoticgoodcomputing/feat/refactor-api-surface-for-clarity
feat: refactor api surface for clarity
2 parents 9853bd6 + 1bd3624 commit 567a9f8

597 files changed

Lines changed: 64989 additions & 3635 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.config/dotnet-tools.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616
"dotnet-script"
1717
],
1818
"rollForward": false
19+
},
20+
"docfx": {
21+
"version": "2.78.5",
22+
"commands": [
23+
"docfx"
24+
],
25+
"rollForward": false
1926
}
2027
}
2128
}

.vscode/flowthru.code-snippets

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010
"scope": "csharp",
1111
"prefix": "_pipeline",
1212
"body": [
13-
"using Flowthru.Pipelines;",
13+
"using Flowthru.Flows;",
1414
"using $1",
1515
"",
1616
"public static class ${RELATIVE_FILEPATH/.*?(\\w+).cs$/$1/}",
1717
"{",
1818
" public static Pipeline Create(Catalog catalog)",
1919
" {",
20-
" return PipelineBuilder.CreatePipeline(pipeline =>",
20+
" return FlowBuilder.CreateFlow(pipeline =>",
2121
" {",
22-
" pipeline.AddNode(",
22+
" pipeline.AddStep(",
2323
" label: \"$2\",",
2424
" description: \"\"\"",
2525
" $3",
@@ -34,7 +34,7 @@
3434
],
3535
"description": "Creates a basic Flowthru pipeline structure."
3636
},
37-
"FlowthruNode": {
37+
"FlowthruStep": {
3838
"scope": "csharp",
3939
"prefix": "_node",
4040
"body": [

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ Flowthru handles these through an **effect type** called `FlowIO<T>`. If you're
7474
The key runtime guarantees:
7575

7676
- **All I/O is lazy and explicit.** Side effects cannot be accidentally dropped or silently ignored.
77-
- **Errors are captured, never swallowed.** Node failures propagate to structured pipeline results. Silent `catch {}` blocks are a bug.
78-
- **Nodes are isolated.** A failing node halts execution and reports which node failed and why — partial silent failures are not possible.
77+
- **Errors are captured, never swallowed.** Step failures propagate to structured pipeline results. Silent `catch {}` blocks are a bug.
78+
- **Steps are isolated.** A failing node halts execution and reports which node failed and why — partial silent failures are not possible.
7979

8080
## Decision Rules for Contributors
8181

Directory.Build.props

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
<RestorePackagesWithLockFile>false</RestorePackagesWithLockFile>
1717
</PropertyGroup>
1818

19+
<!-- Generate XML documentation for src/ projects (consumed by DocFX) -->
20+
<PropertyGroup Condition="$(MSBuildProjectDirectory.Contains('src'))">
21+
<GenerateDocumentationFile>true</GenerateDocumentationFile>
22+
</PropertyGroup>
23+
1924
<!-- Centralized NuGet package configuration for all src/ projects -->
2025
<PropertyGroup Condition="$(MSBuildProjectDirectory.Contains('src'))">
2126
<Version>0.1.33</Version>

docs/CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Consider these examples:
2727
- **"How do I start writing my first pipeline?"** → This is someone learning. They need a tutorial.
2828
- **"How can I read data from a database instead of local files?"** → This is someone working with a specific need. They need a guide.
2929
- **"Why does Flowthru use so many types?"** → This is someone studying the framework's design. They need an explanation.
30-
- **"What parameters does `CatalogEntry` accept?"** → This is someone looking up technical details. They need reference documentation.
30+
- **"What parameters does `Item` accept?"** → This is someone looking up technical details. They need reference documentation.
3131

3232
The same topic can yield different documentation depending on the question:
3333

@@ -156,7 +156,7 @@ Let's say you want to document "catalog entries." Start by listing questions:
156156
- "How do I create my first catalog entry?" → Tutorial
157157
- "How do I configure a catalog entry for Parquet files?" → Guide
158158
- "Why are catalog entries properties rather than string keys?" → Explanation
159-
- "What methods are available on `ICatalogEntry<T>`?" → Reference
159+
- "What methods are available on `IItem<T>`?" → Reference
160160

161161
Each question becomes a separate piece of documentation in its appropriate category. Together, they serve users at every stage of their journey with catalog entries.
162162

docs/explanation/advanced/storage-composition.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,11 @@ public class EFCoreStorageAdapter<T> : IStorageAdapter<IEnumerable<T>>
140140

141141
## Catalog-Level Constraint Narrowing
142142

143-
After adapter creation, pipeline authors can further constrain catalog entries using `CatalogEntry<T>.Constrain()`:
143+
After adapter creation, pipeline authors can further constrain catalog entries using `Item<T>.Constrain()`:
144144

145145
```csharp
146-
public ICatalogEntry<IEnumerable<Company>> Companies => GetOrCreateEntry(() =>
147-
CatalogEntries.Enumerable.Csv<Company>("companies", "data/companies.csv")
146+
public IItem<IEnumerable<Company>> Companies => GetOrCreateEntry(() =>
147+
ItemFactory.Enumerable.Csv<Company>("companies", "data/companies.csv")
148148
.Constrain(traits => traits with { CanWrite = false })
149149
);
150150
```
@@ -201,7 +201,7 @@ The trait system solves all three:
201201

202202
- **Composable**: Eight independent boolean flags
203203
- **Queryable**: `adapter.Traits.CanWrite` checked before operation
204-
- **Fail-fast**: `CatalogEntry.Constrain()` validates at initialization
204+
- **Fail-fast**: `Item.Constrain()` validates at initialization
205205

206206
The old interfaces remain as `[Obsolete]` for backward compatibility but have no effect on pipeline behavior.
207207

@@ -211,9 +211,9 @@ The storage architecture is implemented in:
211211

212212
- `src/core/Flowthru/Data/Capabilities/StorageTraits.cs` — the trait record
213213
- `src/core/Flowthru/Data/Storage/ComposedStorageAdapter.cs` — trait merging logic
214-
- `src/core/Flowthru/Data/CatalogEntry.cs``Constrain()` with ratchet validation
214+
- `src/core/Flowthru/Data/Item.cs``Constrain()` with ratchet validation
215215
- `src/core/Flowthru/Data/Storage/Medium/` — medium implementations with traits
216216
- `src/core/Flowthru/Data/Storage/Format/` — format serializers with traits
217217

218-
Catalog entry factories (`CatalogEntries.Enumerable.Csv<T>(...)`) construct composed adapters with sensible defaults. Extension authors creating custom adapters should declare traits that accurately reflect their capabilities.
218+
Catalog entry factories (`ItemFactory.Enumerable.Csv<T>(...)`) construct composed adapters with sensible defaults. Extension authors creating custom adapters should declare traits that accurately reflect their capabilities.
219219

docs/explanation/anatomy-of-a-pipeline.md

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ In this example:
7575
// Data/_01_Raw/Catalog.Raw.cs
7676
public partial class Catalog
7777
{
78-
public ICatalogEntry<IEnumerable<IrisRawSchema>> IrisRaw =>
78+
public IItem<IEnumerable<IrisRawSchema>> IrisRaw =>
7979
GetOrCreateEntry(() =>
80-
CatalogEntries.Enumerable.Csv<IrisRawSchema>(
80+
ItemFactory.Enumerable.Csv<IrisRawSchema>(
8181
label: "IrisRaw",
8282
filePath: $"{_basePath}/_01_Raw/Datasets/iris.csv"
8383
)
@@ -95,9 +95,9 @@ In this example:
9595
// Data/_04_Feature/Catalog.Feature.cs
9696
public partial class Catalog
9797
{
98-
public ICatalogEntry<IEnumerable<IrisFeatureSchema>> IrisFeatures =>
98+
public IItem<IEnumerable<IrisFeatureSchema>> IrisFeatures =>
9999
GetOrCreateEntry(() =>
100-
CatalogEntries.Enumerable.Parquet<IrisFeatureSchema>(
100+
ItemFactory.Enumerable.Parquet<IrisFeatureSchema>(
101101
label: "IrisFeatures",
102102
filePath: $"{_basePath}/_04_Feature/Datasets/iris_features.parquet"
103103
)
@@ -169,13 +169,13 @@ Pipelines are made up of **nodes** — transformation functions that are built t
169169

170170
1. When writing a node, you don't need to worry about how it's connecting to other nodes — you just need to make sure it inputs the schema, and outputs the schema
171171

172-
### Nodes
172+
### Steps
173173

174-
Nodes are simply functions. Easy! The **only** purpose of a node is to take in data that has one schema, and convert it to data that has another schema.
174+
Steps are simply functions. Easy! The **only** purpose of a node is to take in data that has one schema, and convert it to data that has another schema.
175175

176176
```csharp
177-
// Pipelines/DataEngineering/Nodes/SplitAndEncodeNode.cs
178-
public static class SplitAndEncodeNode
177+
// Pipelines/DataEngineering/Steps/SplitAndEncodeStep.cs
178+
public static class SplitAndEncodeStep
179179
{
180180
public static Func<
181181
IEnumerable<IrisRawSchema>, // Input Schema
@@ -194,8 +194,8 @@ public static class SplitAndEncodeNode
194194

195195
Key points:
196196

197-
- Nodes are a *contract*: that data for the node will **always** come in as the input schemas, and **always** come out as the output schemas.
198-
- Nodes can have any number of inputs, and any number of outputs — as long as they're defined in the input and output schemas, you're not limited to just one-in, one-out.
197+
- Steps are a *contract*: that data for the node will **always** come in as the input schemas, and **always** come out as the output schemas.
198+
- Steps can have any number of inputs, and any number of outputs — as long as they're defined in the input and output schemas, you're not limited to just one-in, one-out.
199199

200200

201201
### Pipelines
@@ -214,11 +214,11 @@ public static class DataEngineeringPipeline
214214
{
215215
public static Pipeline Create(Catalog catalog, Params parameters)
216216
{
217-
return PipelineBuilder.CreatePipeline(pipeline =>
217+
return FlowBuilder.CreateFlow(pipeline =>
218218
{
219-
pipeline.AddNode(
219+
pipeline.AddStep(
220220
label: "SplitAndEncode", // Unique label for this node in the pipeline
221-
transform: SplitAndEncodeNode.Create(),
221+
transform: SplitAndEncodeStep.Create(),
222222
input: catalog.IrisRaw,
223223
output: catalog.IrisFeatures
224224
);
@@ -229,7 +229,7 @@ public static class DataEngineeringPipeline
229229

230230
Key points:
231231

232-
- **Nodes are never directly connected to each other**: Nodes always take in Catalog entries, and output Catalog entries — *never* directly to each other.
232+
- **Steps are never directly connected to each other**: Steps always take in Catalog entries, and output Catalog entries — *never* directly to each other.
233233
- **Order doesn't matter.** A node in the pipeline is **only** ever concerned about its input data and output data. Flowthru handles the order when you run the pipeline: as long as the data is available, or generated by another node, your pipeline will run.
234234

235235
## Entry Point
@@ -241,7 +241,7 @@ private static void ConfigureServices(IServiceCollection services, string basePa
241241
{
242242
services.AddFlowthru(flowthru =>
243243
{
244-
flowthru.UseCatalog(_ => new Catalog(basePath: "Data")));
244+
flowthru.RegisterCatalog(_ => new Catalog(basePath: "Data")));
245245

246246
flowthru.RegisterPipeline<Catalog>(
247247
label: "DataEngineering",
@@ -256,4 +256,4 @@ private static void ConfigureServices(IServiceCollection services, string basePa
256256
}
257257
```
258258

259-
And that's it! This finishes the pipeline anatomy. At this point, you have Catalog entries, connected with Nodes in your Pipelines — everything you need to find new and creative ways to organize, analyze, and report on your data!
259+
And that's it! This finishes the pipeline anatomy. At this point, you have Catalog entries, connected with Steps in your Pipelines — everything you need to find new and creative ways to organize, analyze, and report on your data!

docs/guides/advanced/container-deployment.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Deploy a Flowthru pipeline as a standalone container image for execution in envi
1313
A containerized pipeline replaces `FlowthruCli` with a minimal `Program.cs` that owns its own DI container. The key difference: no CLI argument parsing, no filesystem-based configuration.
1414

1515
```csharp
16-
using Flowthru.Pipelines;
16+
using Flowthru.Flows;
1717
using Flowthru.Services;
1818
using Microsoft.Extensions.DependencyInjection;
1919
using Microsoft.Extensions.Logging;
@@ -26,7 +26,7 @@ services.AddLogging(logging =>
2626
services.AddFlowthru(flowthru =>
2727
{
2828
flowthru
29-
.UseCatalog<MyCatalog>()
29+
.RegisterCatalog<MyCatalog>()
3030
.RegisterPipeline<MyCatalog>("Ingest", catalog => IngestPipeline.Create(catalog))
3131
.RegisterPipeline<MyCatalog>("Transform", catalog => TransformPipeline.Create(catalog));
3232

@@ -50,19 +50,19 @@ return result.Success ? 0 : 1;
5050

5151
// --- helpers ---
5252
53-
static PipelineSliceStrategy BuildSliceStrategy()
53+
static FlowSliceStrategy BuildSliceStrategy()
5454
{
5555
static HashSet<string>? ParseCsv(string? value) =>
5656
string.IsNullOrWhiteSpace(value) ? null : new(value.Split(',', StringSplitOptions.RemoveEmptyEntries));
5757

58-
return new PipelineSliceStrategy
58+
return new FlowSliceStrategy
5959
{
6060
Pipelines = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_PIPELINES")),
61-
FromNodes = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_FROM_NODES")),
62-
ToNodes = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_TO_NODES")),
61+
FromSteps = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_FROM_NODES")),
62+
ToSteps = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_TO_NODES")),
6363
FromData = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_FROM_DATA")),
6464
ToData = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_TO_DATA")),
65-
OnlyNodes = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_ONLY_NODES")),
65+
OnlySteps = ParseCsv(Environment.GetEnvironmentVariable("FLOWTHRU_ONLY_NODES")),
6666
};
6767
}
6868
```
@@ -87,7 +87,7 @@ services.AddFlowthru(flowthru =>
8787
{
8888
// No UseConfiguration() call — the IConfiguration above is already registered
8989
flowthru
90-
.UseCatalog<MyCatalog>()
90+
.RegisterCatalog<MyCatalog>()
9191
.RegisterPipeline<MyCatalog>("Ingest", catalog => IngestPipeline.Create(catalog));
9292
});
9393
```
@@ -152,7 +152,7 @@ docker run \
152152

153153
## Inspecting Results
154154

155-
`ExecutePipelineAsync` returns a `PipelineResult` — a structured value object with `Success`, `ExecutionTime`, per-node `NodeResults` (including individual timing, I/O counts, and exceptions), and an optional top-level `Exception`. Serialize it to structured output for your runtime's observability:
155+
`ExecutePipelineAsync` returns a `FlowResult` — a structured value object with `Success`, `ExecutionTime`, per-node `StepResults` (including individual timing, I/O counts, and exceptions), and an optional top-level `Exception`. Serialize it to structured output for your runtime's observability:
156156

157157
```csharp
158158
var result = await flowthru.ExecutePipelineAsync(options, cancellationToken: cts.Token);
@@ -163,9 +163,9 @@ if (!result.Success)
163163
result.ExecutionTime.TotalSeconds,
164164
result.Exception?.Message);
165165

166-
foreach (var (name, node) in result.NodeResults.Where(n => !n.Value.Success))
166+
foreach (var (name, node) in result.StepResults.Where(n => !n.Value.Success))
167167
{
168-
logger.LogError(" Node {Node} failed: {Error}", name, node.Exception?.Message);
168+
logger.LogError(" Step {Step} failed: {Error}", name, node.Exception?.Message);
169169
}
170170
}
171171
```

docs/guides/advanced/metadata-providers.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ public class DashboardMetadataProvider : IMetadataProvider
4040
{
4141
var payload = new
4242
{
43-
PipelineName = dag.PipelineName,
44-
NodeCount = dag.Nodes.Count,
43+
FlowName = dag.FlowName,
44+
StepCount = dag.Steps.Count,
4545
EdgeCount = dag.Edges.Count,
4646
Timestamp = DateTime.UtcNow
4747
};
@@ -85,14 +85,14 @@ using Flowthru.Meta.Providers;
8585

8686
services.AddFlowthru(flowthru =>
8787
{
88-
flowthru.UseCatalog(_ => new MyCatalog());
89-
flowthru.UsePipelines(_ => myPipelines);
88+
flowthru.RegisterCatalog(_ => new MyCatalog());
89+
flowthru.RegisterPipelines(_ => myPipelines);
9090

9191
flowthru.ConfigureMetadata(meta =>
9292
{
9393
meta.AddProvider<JsonMetadataProvider, JsonMetadataProviderBuilder>(json => json
9494
.WithOutputDirectory("metadata")
95-
.WithFilenameTemplate("dag-{PipelineName}-{Timestamp}")
95+
.WithFilenameTemplate("dag-{FlowName}-{Timestamp}")
9696
.WithTimestamp("yyyyMMdd-HHmmss")
9797
.UseCompactFormat());
9898

@@ -143,9 +143,9 @@ var singlePipelineDag = service.GetDagMetadata(pipelineName: "DataEngineering");
143143

144144
// Get DAG with slicing applied
145145
var slicedDag = service.GetDagMetadata(
146-
sliceStrategy: new PipelineSliceStrategy
146+
sliceStrategy: new FlowSliceStrategy
147147
{
148-
ToNodes = new HashSet<string> { "TransformNode" }
148+
ToSteps = new HashSet<string> { "TransformStep" }
149149
}
150150
);
151151
```
@@ -159,7 +159,7 @@ Useful for tooling, tests, or debugging pipeline structure before execution.
159159
```csharp
160160
meta.AddProvider<JsonMetadataProvider, JsonMetadataProviderBuilder>(json => json
161161
.WithOutputDirectory("metadata")
162-
.WithFilenameTemplate("dag-{PipelineName}-{Timestamp}")
162+
.WithFilenameTemplate("dag-{FlowName}-{Timestamp}")
163163
.WithTimestamp("yyyyMMdd-HHmmss")
164164
.UseCompactFormat());
165165
```
@@ -170,6 +170,6 @@ meta.AddProvider<JsonMetadataProvider, JsonMetadataProviderBuilder>(json => json
170170
meta.AddProvider<MermaidMetadataProvider, MermaidMetadataProviderBuilder>(mermaid => mermaid
171171
.WithOutputDirectory("metadata")
172172
.WithDirection(MermaidFlowchartDirection.LeftToRight)
173-
.WithActiveNodeColor("#90EE90")
173+
.WithActiveStepColor("#90EE90")
174174
.WithActiveDataColor("#ADD8E6"));
175175
```

0 commit comments

Comments
 (0)