From 0ca5783bc821c895e0f102ce44afbadd0a3e1c42 Mon Sep 17 00:00:00 2001 From: pavantc Date: Thu, 16 Apr 2026 17:44:49 -0700 Subject: [PATCH 1/3] Fix race condition: atomic forwarding table write and zero-entry validation Use mkstemp + rename for atomic binary forwarding table writes in glb-director-cli to prevent glb-director-xdp from reading a partially written file during reload. Add defensive checks in check_config() to reject forwarding tables with 0 tables, 0 backends, or 0 binds, catching corrupt or incomplete configurations early. Includes C tests for check_config validation (14 cases) and Python tests for atomic write behavior (3 cases). --- src/glb-director/cli/Makefile | 15 +- src/glb-director/cli/main.c | 49 ++++- src/glb-director/glb_fwd_config.c | 22 +++ src/glb-director/tests/config_check.sh | 7 + src/glb-director/tests/test_check_config.c | 220 +++++++++++++++++++++ src/glb-director/tests/test_cli_tool.py | 103 +++++++++- 6 files changed, 409 insertions(+), 7 deletions(-) create mode 100644 src/glb-director/tests/test_check_config.c diff --git a/src/glb-director/cli/Makefile b/src/glb-director/cli/Makefile index d584c639..8d5f6e6d 100644 --- a/src/glb-director/cli/Makefile +++ b/src/glb-director/cli/Makefile @@ -28,7 +28,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -all: glb-director-cli glb-config-check glb-director-pcap glb-director-stub-server +all: glb-director-cli glb-config-check glb-director-pcap glb-director-stub-server test-check-config CHECK_SRCS = config_check.c \ ../glb_control_loop.c \ @@ -117,5 +117,16 @@ glb-director-stub-server: -DPCAP_MODE $(LDFLAGS)\ -m64 -mssse3 +test-check-config: ../tests/test_check_config.c ../glb_fwd_config.c ../siphash24.c + gcc \ + $(CFLAGS) \ + -DNO_DPDK \ + -I`pwd`/.. \ + -I`pwd`/../.. \ + ../tests/test_check_config.c \ + ../glb_fwd_config.c \ + ../siphash24.c $(LDFLAGS)\ + -o test-check-config + clean: - rm -rf glb-director-cli glb-config-check glb-director-pcap glb-director-stub-server + rm -rf glb-director-cli glb-config-check glb-director-pcap glb-director-stub-server test-check-config diff --git a/src/glb-director/cli/main.c b/src/glb-director/cli/main.c index dea70793..8731ab9c 100644 --- a/src/glb-director/cli/main.c +++ b/src/glb-director/cli/main.c @@ -33,8 +33,12 @@ #include #include #include +#include +#include #include +#include #include +#include #include "log.h" @@ -121,6 +125,15 @@ int sortable_backend_cmp(const void *a_, const void *b_) return 0; } +/* Temp file path for atomic write; cleaned up on abnormal exit via atexit */ +static char tmp_path[PATH_MAX]; + +static void cleanup_tmp_file(void) +{ + if (tmp_path[0] != '\0') + unlink(tmp_path); +} + void usage() { glb_log_error( @@ -163,9 +176,31 @@ int main(int argc, char *argv[]) return 1; } - FILE *out = fopen(dst_binary, "wb"); + /* + * Write to a temporary file in the same directory, then atomically + * rename to the final path. This avoids races where a reader (e.g. + * glb-director-xdp) could see a partially-written file. + */ + char *dst_copy = strdup(dst_binary); + const char *dst_dir = dirname(dst_copy); + snprintf(tmp_path, sizeof(tmp_path), "%s/.glb-table-XXXXXX", dst_dir); + free(dst_copy); + + int tmp_fd = mkstemp(tmp_path); + if (tmp_fd < 0) { + glb_log_error("Could not create temporary file for writing."); + tmp_path[0] = '\0'; + return 1; + } + + atexit(cleanup_tmp_file); + + FILE *out = fdopen(tmp_fd, "wb"); if (out == NULL) { - glb_log_error("Could not open destination file for writing."); + glb_log_error("Could not open temporary file for writing."); + close(tmp_fd); + unlink(tmp_path); + tmp_path[0] = '\0'; return 1; } @@ -519,5 +554,15 @@ int main(int argc, char *argv[]) fclose(out); + if (rename(tmp_path, dst_binary) != 0) { + glb_log_error("Failed to rename temporary file to destination."); + unlink(tmp_path); + tmp_path[0] = '\0'; + return 1; + } + + /* Rename succeeded; clear tmp_path so atexit handler is a no-op */ + tmp_path[0] = '\0'; + return 0; } diff --git a/src/glb-director/glb_fwd_config.c b/src/glb-director/glb_fwd_config.c index 67f3c892..d530ac46 100644 --- a/src/glb-director/glb_fwd_config.c +++ b/src/glb-director/glb_fwd_config.c @@ -297,10 +297,32 @@ int check_config(struct glb_fwd_config_ctx *ctx) return 1; } + if (ctx->raw_config->num_tables == 0) { + glb_log_error( + "glb-config loading failed: forwarding table has 0 tables"); + return 1; + } + for (i = 0; i < ctx->raw_config->num_tables; i++) { struct glb_fwd_config_content_table *table = &ctx->raw_config->tables[i]; + if (table->num_backends == 0) { + glb_log_error( + "glb-config loading failed: table %d has 0 " + "backends", + i); + return 1; + } + + if (table->num_binds == 0) { + glb_log_error( + "glb-config loading failed: table %d has 0 " + "binds", + i); + return 1; + } + if (table->num_binds > GLB_FMT_MAX_NUM_BINDS) { glb_log_error( "glb-config loading failed: too many binds: %d", diff --git a/src/glb-director/tests/config_check.sh b/src/glb-director/tests/config_check.sh index 2a24efc0..571584fb 100644 --- a/src/glb-director/tests/config_check.sh +++ b/src/glb-director/tests/config_check.sh @@ -76,3 +76,10 @@ begin_test "no errors" grep -iv 'failed' $BASEDIR/build/check.out ) end_test + +begin_test "check_config rejects corrupt tables (0 backends, 0 binds, 0 tables)" +( + $BASEDIR/cli/test-check-config +) +end_test + diff --git a/src/glb-director/tests/test_check_config.c b/src/glb-director/tests/test_check_config.c new file mode 100644 index 00000000..1e74266d --- /dev/null +++ b/src/glb-director/tests/test_check_config.c @@ -0,0 +1,220 @@ +/* + * BSD 3-Clause License + * + * Copyright (c) 2018 GitHub. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Unit tests for check_config() validation in glb_fwd_config.c. + * Compiled with NO_DPDK to avoid DPDK dependency. + * + * Tests that the forwarding table validator correctly rejects tables + * with 0 backends, 0 binds, or 0 tables. + */ + +#include +#include +#include +#include +#include + +#define NO_DPDK 1 +#include "glb_fwd_config.h" + +/* Minimal stubs to satisfy linker */ +bool debug = false; + +int tests_run = 0; +int tests_failed = 0; + +#define ASSERT(cond, msg) \ + do { \ + tests_run++; \ + if (!(cond)) { \ + fprintf(stderr, "FAIL: %s (line %d): %s\n", msg, \ + __LINE__, #cond); \ + tests_failed++; \ + } else { \ + fprintf(stdout, "PASS: %s\n", msg); \ + } \ + } while (0) + +/* + * Build a minimal valid forwarding table in memory. + * Caller must free the returned ctx->raw_config and ctx. + */ +static struct glb_fwd_config_ctx * +build_config(uint32_t num_tables, uint32_t *num_backends_per_table, + uint32_t *num_binds_per_table) +{ + uint64_t size = sizeof(struct glb_fwd_config_content) + + (sizeof(struct glb_fwd_config_content_table) * + num_tables); + + struct glb_fwd_config_content *content = calloc(1, size); + if (content == NULL) + return NULL; + + content->magic_word = GLB_FMT_MAGIC_WORD; + content->version = GLB_FMT_VERSION; + content->num_tables = num_tables; + content->table_entries = GLB_FMT_TABLE_ENTRIES; + content->max_num_backends = GLB_FMT_MAX_NUM_BACKENDS; + content->max_num_binds = GLB_FMT_MAX_NUM_BINDS; + + for (uint32_t i = 0; i < num_tables; i++) { + struct glb_fwd_config_content_table *table = &content->tables[i]; + table->num_backends = num_backends_per_table[i]; + table->num_binds = num_binds_per_table[i]; + + /* Fill in minimal valid backend/bind entries */ + for (uint32_t b = 0; b < table->num_backends; b++) { + table->backends[b].family = FAMILY_IPV4; + table->backends[b].ipv4_addr = htonl(0x01020300 + b); + table->backends[b].state = GLB_BACKEND_STATE_ACTIVE; + table->backends[b].healthy = GLB_BACKEND_HEALTH_UP; + } + for (uint32_t b = 0; b < table->num_binds; b++) { + table->binds[b].family = FAMILY_IPV4; + table->binds[b].ipv4_addr = htonl(0x01010100 + b); + table->binds[b].port_start = 80; + table->binds[b].port_end = 80; + table->binds[b].proto = SUPPORTED_PROTOS_TCP; + } + } + + struct glb_fwd_config_ctx *ctx = + calloc(1, sizeof(struct glb_fwd_config_ctx)); + if (ctx == NULL) { + free(content); + return NULL; + } + + ctx->raw_config = content; + ctx->raw_config_size = size; + ctx->_ref_count = 1; + + return ctx; +} + +static void free_config(struct glb_fwd_config_ctx *ctx) +{ + if (ctx != NULL) { + free(ctx->raw_config); + free(ctx); + } +} + +static void test_valid_config(void) +{ + uint32_t backends[] = {3}; + uint32_t binds[] = {2}; + struct glb_fwd_config_ctx *ctx = build_config(1, backends, binds); + ASSERT(ctx != NULL, "build valid config"); + ASSERT(check_config(ctx) == 0, "valid config passes check_config"); + free_config(ctx); +} + +static void test_zero_tables(void) +{ + struct glb_fwd_config_ctx *ctx = build_config(0, NULL, NULL); + ASSERT(ctx != NULL, "build 0-tables config"); + ASSERT(check_config(ctx) != 0, + "config with 0 tables is rejected by check_config"); + free_config(ctx); +} + +static void test_zero_backends(void) +{ + uint32_t backends[] = {0}; + uint32_t binds[] = {2}; + struct glb_fwd_config_ctx *ctx = build_config(1, backends, binds); + ASSERT(ctx != NULL, "build 0-backends config"); + ASSERT(check_config(ctx) != 0, + "config with 0 backends is rejected by check_config"); + free_config(ctx); +} + +static void test_zero_binds(void) +{ + uint32_t backends[] = {3}; + uint32_t binds[] = {0}; + struct glb_fwd_config_ctx *ctx = build_config(1, backends, binds); + ASSERT(ctx != NULL, "build 0-binds config"); + ASSERT(check_config(ctx) != 0, + "config with 0 binds is rejected by check_config"); + free_config(ctx); +} + +static void test_zero_backends_second_table(void) +{ + uint32_t backends[] = {3, 0}; + uint32_t binds[] = {2, 2}; + struct glb_fwd_config_ctx *ctx = build_config(2, backends, binds); + ASSERT(ctx != NULL, "build config with 0 backends in second table"); + ASSERT(check_config(ctx) != 0, + "config with 0 backends in second table is rejected"); + free_config(ctx); +} + +static void test_zero_binds_second_table(void) +{ + uint32_t backends[] = {3, 3}; + uint32_t binds[] = {2, 0}; + struct glb_fwd_config_ctx *ctx = build_config(2, backends, binds); + ASSERT(ctx != NULL, "build config with 0 binds in second table"); + ASSERT(check_config(ctx) != 0, + "config with 0 binds in second table is rejected"); + free_config(ctx); +} + +static void test_multiple_valid_tables(void) +{ + uint32_t backends[] = {3, 4}; + uint32_t binds[] = {2, 3}; + struct glb_fwd_config_ctx *ctx = build_config(2, backends, binds); + ASSERT(ctx != NULL, "build multi-table valid config"); + ASSERT(check_config(ctx) == 0, + "valid multi-table config passes check_config"); + free_config(ctx); +} + +int main(void) +{ + test_valid_config(); + test_zero_tables(); + test_zero_backends(); + test_zero_binds(); + test_zero_backends_second_table(); + test_zero_binds_second_table(); + test_multiple_valid_tables(); + + printf("\n%d/%d tests passed\n", tests_run - tests_failed, tests_run); + return tests_failed > 0 ? 1 : 0; +} diff --git a/src/glb-director/tests/test_cli_tool.py b/src/glb-director/tests/test_cli_tool.py index 1695e9a0..cb3b8abf 100644 --- a/src/glb-director/tests/test_cli_tool.py +++ b/src/glb-director/tests/test_cli_tool.py @@ -17,7 +17,8 @@ from rendezvous_table import GLBRendezvousTable from nose.tools import assert_equals -import json, subprocess, struct, socket +import glob +import json, subprocess, struct, socket, os, tempfile class TestGLBBinaryCLI(): def get_example_config(self): @@ -57,9 +58,8 @@ def get_example_config(self): def write_example_config(self): config = self.get_example_config() - with open('tests/test-config.json', 'wb') as f: + with open('tests/test-config.json', 'w') as f: f.write(json.dumps(config, indent=4)) - f.close() def get_example_table_reference_implementation(self, table_index): table_config = self.get_example_config()['tables'][table_index] @@ -153,3 +153,100 @@ def test_generate_configs(self): # table = GLBRendezvousTable(forwarding_table_seed) # assert_equals(table.calculate_forwarding_table_row_seed(0x0000).encode('hex'), '491c53a72df4c837') # assert_equals(table.calculate_forwarding_table_row_seed(0xffff).encode('hex'), 'f223c0cc65161620') + + def test_atomic_write_no_temp_file_remains(self): + """Verify that no temporary file is left behind after a successful build.""" + self.write_example_config() + output_dir = 'tests' + output_file = os.path.join(output_dir, 'test-config.bin') + + # Remove output file if it exists + if os.path.exists(output_file): + os.unlink(output_file) + + subprocess.check_call([ + 'cli/glb-director-cli', 'build-config', + 'tests/test-config.json', output_file + ]) + + # The output file should exist + assert os.path.exists(output_file), "Output file should exist after build" + + # No temp files should remain in the output directory + tmp_files = glob.glob(os.path.join(output_dir, '.glb-table-*')) + assert_equals(len(tmp_files), 0, + "No temporary files should remain after successful build") + + def test_atomic_write_preserves_existing_on_failure(self): + """Verify that an existing valid file is not corrupted by a failed build.""" + self.write_example_config() + output_file = 'tests/test-config.bin' + + # First, create a valid binary + subprocess.check_call([ + 'cli/glb-director-cli', 'build-config', + 'tests/test-config.json', output_file + ]) + + # Record the content of the valid file + with open(output_file, 'rb') as f: + valid_content = f.read() + + # Now write a bad JSON config (missing backends) + bad_config = { + "tables": [{ + "hash_key": "12345678901234561234567890123456", + "seed": "34567890123456783456789012345678", + "binds": [ + { "ip": "1.1.1.1", "proto": "tcp", "port": 80 } + ] + # missing "backends" key + }] + } + with open('tests/test-bad-config.json', 'w') as f: + f.write(json.dumps(bad_config, indent=4)) + result = subprocess.call([ + 'cli/glb-director-cli', 'build-config', + 'tests/test-bad-config.json', output_file + ]) + assert result != 0, "Build should fail with bad config" + + # The original file should be unchanged + with open(output_file, 'rb') as f: + assert_equals(f.read(), valid_content, + "Original file should not be modified on failure") + + # Clean up + os.unlink('tests/test-bad-config.json') + + def test_atomic_write_cleans_temp_on_failure(self): + """Verify that temp files are cleaned up when the build fails.""" + output_dir = 'tests' + + bad_config = { + "tables": [{ + "hash_key": "12345678901234561234567890123456", + "seed": "34567890123456783456789012345678", + "binds": [ + { "ip": "1.1.1.1", "proto": "tcp", "port": 80 } + ] + # missing "backends" key + }] + } + with open('tests/test-bad-config.json', 'w') as f: + f.write(json.dumps(bad_config, indent=4)) + + subprocess.call([ + 'cli/glb-director-cli', 'build-config', + 'tests/test-bad-config.json', 'tests/test-bad-output.bin' + ]) + + # No temp files should remain + tmp_files = glob.glob(os.path.join(output_dir, '.glb-table-*')) + assert_equals(len(tmp_files), 0, + "No temporary files should remain after failed build") + + # Clean up + os.unlink('tests/test-bad-config.json') + if os.path.exists('tests/test-bad-output.bin'): + os.unlink('tests/test-bad-output.bin') From eefac3dca9f6b74c40955517352c71723b6f031d Mon Sep 17 00:00:00 2001 From: pavantc Date: Thu, 16 Apr 2026 18:44:27 -0700 Subject: [PATCH 2/3] Fix mkstemp file permissions and add strdup NULL check Add fchmod after mkstemp to match fopen("wb") permissions (0666 & ~umask) instead of the default 0600, so readers under different users can access the forwarding table file. Add NULL check on strdup(dst_binary) to handle OOM gracefully. --- src/glb-director/cli/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/glb-director/cli/main.c b/src/glb-director/cli/main.c index 8731ab9c..b6d539e4 100644 --- a/src/glb-director/cli/main.c +++ b/src/glb-director/cli/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "log.h" @@ -182,6 +183,10 @@ int main(int argc, char *argv[]) * glb-director-xdp) could see a partially-written file. */ char *dst_copy = strdup(dst_binary); + if (dst_copy == NULL) { + glb_log_error("Out of memory allocating path for temporary file."); + return 1; + } const char *dst_dir = dirname(dst_copy); snprintf(tmp_path, sizeof(tmp_path), "%s/.glb-table-XXXXXX", dst_dir); free(dst_copy); @@ -193,6 +198,11 @@ int main(int argc, char *argv[]) return 1; } + /* mkstemp creates with 0600; match fopen("wb") behavior (0666 & ~umask) */ + mode_t old_umask = umask(0); + umask(old_umask); + fchmod(tmp_fd, 0666 & ~old_umask); + atexit(cleanup_tmp_file); FILE *out = fdopen(tmp_fd, "wb"); From 1d4c1af42c684f639ffe337428fca8925b81f705 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Apr 2026 02:14:17 +0000 Subject: [PATCH 3/3] Fix flaky focal package build apt sources and retries Agent-Logs-Url: https://github.com/github/glb-director/sessions/01788dc8-9e6c-4215-903b-c5660861e395 Co-authored-by: pavantc <625877+pavantc@users.noreply.github.com> --- script/Dockerfile.focal | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/Dockerfile.focal b/script/Dockerfile.focal index 20fb5dbb..2208dbb5 100644 --- a/script/Dockerfile.focal +++ b/script/Dockerfile.focal @@ -1,9 +1,11 @@ FROM ubuntu:focal +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries + RUN apt-get update && apt-get -y install curl git # DPDK -RUN echo "deb http://dk.archive.ubuntu.com/ubuntu/ bionic main universe" >> /etc/apt/sources.list +RUN echo "deb http://archive.ubuntu.com/ubuntu/ bionic main universe" >> /etc/apt/sources.list ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y build-essential dpdk=17.11.1-6 dpdk-dev=17.11.1-6 libdpdk-dev=17.11.1-6 wget pkg-config libjansson-dev libsystemd-dev