Compare commits

...

10 Commits

Author SHA1 Message Date
Joseph Doherty 9bdb899774 fix(clients): inline Go gosec directive and strip IPv6 brackets in Python authority split 2026-06-01 07:57:22 -04:00
Joseph Doherty e5c704de69 feat(gateway): add machine FQDN to self-signed cert SANs
Best-effort resolve the host FQDN via Dns.GetHostEntry and add it as a
DNS SAN when it differs (OrdinalIgnoreCase) from the short machine name
and "localhost". SocketException / ArgumentException are caught and
silently skipped so cert generation remains robust when DNS is absent.
2026-06-01 07:52:48 -04:00
Joseph Doherty 4e520f9c0c fix(gateway): delete temp cert file on persist failure
Wrap the WriteAllBytes/Move/HardenPermissions sequence in a try/catch so
that any failure best-effort deletes the hardened .tmp file (which may
already hold PFX/private-key bytes) before rethrowing.  Add a test that
induces a persist failure by pointing SelfSignedCertPath inside a
regular file and asserts no .tmp is left on disk.
2026-06-01 07:45:15 -04:00
Joseph Doherty 2eb81379e4 docs: TLS auto-cert and lenient client trust 2026-06-01 07:43:13 -04:00
Joseph Doherty ddd5721082 fix(gateway): harden self-signed cert persistence and config validation 2026-06-01 07:37:27 -04:00
Joseph Doherty 3775f6bf3b feat(gateway): supply generated cert as Kestrel HTTPS default 2026-06-01 07:30:26 -04:00
Joseph Doherty cdfad420bb fix(client-rust): apply TLS guard to GalaxyClient and add CLI strict flag
Extract the TLS-without-CA guard into a shared `build_tls_config` helper
in options.rs so both GatewayClient and GalaxyClient use identical logic.
GalaxyClient previously had no guard, so TLS-without-CA produced a cryptic
tonic handshake failure; it now returns the same actionable InvalidEndpoint
error. The guard message notes that a server-name override affects SNI but
does not pin trust. Add --require-certificate-validation to ConnectionArgs
in the CLI binary. Add a mirror test for GalaxyClient in tests/tls.rs.
2026-06-01 07:28:16 -04:00
Joseph Doherty 330e665f6b fix(gateway): correct ECDSA key usage and dispose CertificateRequest
Drop KeyEncipherment from the self-signed cert's key-usage extension — it
is semantically wrong for ECDSA (RSA key-transport only); DigitalSignature
alone is correct for TLS 1.3 / ECDHE server certs.  CertificateRequest is
unchanged (not IDisposable in .NET 10).  Test now also asserts MachineName,
127.0.0.1 and IPv6 loopback are present in the SAN extension.
2026-06-01 07:27:15 -04:00
Joseph Doherty 5e01ad9c22 fix(client-dotnet): apply lenient TLS to GalaxyRepositoryClient and enforce hostname on CA-pin
Mirror MxGatewayClient's three-branch handler structure in GalaxyRepositoryClient
(CA-pin / lenient accept-all / OS trust) so the Galaxy endpoint works against the
gateway's self-signed cert under the default lenient posture. Expose an internal
CreateHttpHandlerForTests seam for unit testing. Add RemoteCertificateNameMismatch
rejection at the top of both CA-pinned callbacks so a pinned-CA connection truly
verifies the host. Strengthen existing lenient test to invoke the callback and assert
it returns true; add mirrored Galaxy-client handler tests.
2026-06-01 07:24:07 -04:00
Joseph Doherty 77a9108673 feat(gateway): persist/reuse self-signed cert with hardened permissions 2026-06-01 07:23:33 -04:00
33 changed files with 1022 additions and 69 deletions
+19
View File
@@ -107,6 +107,7 @@ public sealed class MxGatewayClientOptions
public required string ApiKey { get; init; }
public bool UseTls { get; init; }
public string? CaCertificatePath { get; init; }
public bool RequireCertificateValidation { get; init; }
public string? ServerNameOverride { get; init; }
public TimeSpan ConnectTimeout { get; init; } = TimeSpan.FromSeconds(10);
public TimeSpan DefaultCallTimeout { get; init; } = TimeSpan.FromSeconds(30);
@@ -124,6 +125,24 @@ or subscription changes because those calls can partially succeed in MXAccess.
API key may be loaded from `MXGATEWAY_API_KEY` by the CLI, not implicitly by the
library constructor unless a helper explicitly says it does that.
### TLS trust posture
The gateway can serve a self-signed certificate it generates itself (it has no
PKI). To make that usable, TLS is **lenient by default**: when `UseTls` is set
and `CaCertificatePath` is empty, `CreateHttpHandler` installs a
`RemoteCertificateValidationCallback` that returns `true`, so the gateway's
self-signed certificate is accepted without verification.
To verify the gateway instead:
- set `CaCertificatePath` to pin a CA — validated via a `CustomRootTrust`
`X509Chain` against that root, and the callback additionally rejects a
hostname/SAN mismatch (`RemoteCertificateNameMismatch`); or
- set `RequireCertificateValidation` to `true` to keep the default OS/system-trust
verification on a connection with no pinned CA.
Pinning a CA always wins over the lenient default.
## Auth Interceptor
Use a gRPC call credentials/interceptor layer to attach:
+11
View File
@@ -287,6 +287,17 @@ Use TLS options for a secured gateway:
dotnet run --project clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli -- smoke --endpoint https://ZB.MOM.WW.MxGateway.example.local:5001 --tls --ca-file C:\certs\mxgateway-ca.pem --server-name ZB.MOM.WW.MxGateway.example.local --api-key-env MXGATEWAY_API_KEY --item Area001.Pump001.Speed --json
```
### TLS trust
The gateway can auto-generate its own self-signed certificate (it has no PKI), so
the client is **lenient by default**: a TLS connection (`UseTls` / `--tls`) with
no pinned CA accepts whatever certificate the gateway presents. To verify
instead, pin a CA with `CaCertificatePath` / `--ca-file` (this path also enforces
the certificate hostname/SAN match), or set `RequireCertificateValidation` to
force OS/system-trust verification without pinning. Use `ServerNameOverride` /
`--server-name` when the dialed host differs from the certificate SAN. See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
## Integration Checks
Run live checks only when a gateway and MXAccess-backed worker are available:
@@ -1,4 +1,5 @@
using System.Net.Http;
using System.Net.Security;
using ZB.MOM.WW.MxGateway.Client;
namespace ZB.MOM.WW.MxGateway.Client.Tests;
@@ -8,6 +9,7 @@ public sealed class MxGatewayClientTlsHandlerTests
/// <summary>
/// Verifies that when TLS is used with no pinned CA and RequireCertificateValidation is false (default),
/// the handler installs an accept-all callback so the gateway's self-signed cert is trusted.
/// The callback must return true regardless of chain errors.
/// </summary>
[Fact]
public void Handler_SkipsVerification_WhenTlsAndNoCaPinned()
@@ -20,6 +22,7 @@ public sealed class MxGatewayClientTlsHandlerTests
};
using SocketsHttpHandler handler = MxGatewayClient.CreateHttpHandlerForTests(options);
Assert.NotNull(handler.SslOptions.RemoteCertificateValidationCallback);
Assert.True(handler.SslOptions.RemoteCertificateValidationCallback!(null!, null!, null, SslPolicyErrors.RemoteCertificateChainErrors));
}
/// <summary>
@@ -40,3 +43,43 @@ public sealed class MxGatewayClientTlsHandlerTests
Assert.Null(handler.SslOptions.RemoteCertificateValidationCallback);
}
}
public sealed class GalaxyRepositoryClientTlsHandlerTests
{
/// <summary>
/// Verifies that when TLS is used with no pinned CA and RequireCertificateValidation is false (default),
/// the Galaxy client handler installs an accept-all callback so the gateway's self-signed cert is trusted.
/// The callback must return true regardless of chain errors.
/// </summary>
[Fact]
public void Handler_SkipsVerification_WhenTlsAndNoCaPinned()
{
MxGatewayClientOptions options = new()
{
Endpoint = new Uri("https://localhost:5120"),
ApiKey = "k",
UseTls = true,
};
using SocketsHttpHandler handler = GalaxyRepositoryClient.CreateHttpHandlerForTests(options);
Assert.NotNull(handler.SslOptions.RemoteCertificateValidationCallback);
Assert.True(handler.SslOptions.RemoteCertificateValidationCallback!(null!, null!, null, SslPolicyErrors.RemoteCertificateChainErrors));
}
/// <summary>
/// Verifies that when RequireCertificateValidation is true, the Galaxy client callback is left null
/// so the OS trust store performs validation.
/// </summary>
[Fact]
public void Handler_KeepsDefaultVerification_WhenRequireCertificateValidation()
{
MxGatewayClientOptions options = new()
{
Endpoint = new Uri("https://localhost:5120"),
ApiKey = "k",
UseTls = true,
RequireCertificateValidation = true,
};
using SocketsHttpHandler handler = GalaxyRepositoryClient.CreateHttpHandlerForTests(options);
Assert.Null(handler.SslOptions.RemoteCertificateValidationCallback);
}
}
@@ -490,7 +490,10 @@ public sealed class GalaxyRepositoryClient : IAsyncDisposable
.ConfigureAwait(false);
}
private static HttpMessageHandler CreateHttpHandler(MxGatewayClientOptions options)
private static HttpMessageHandler CreateHttpHandler(MxGatewayClientOptions options) =>
CreateHttpHandlerForTests(options);
internal static SocketsHttpHandler CreateHttpHandlerForTests(MxGatewayClientOptions options)
{
SocketsHttpHandler handler = new()
{
@@ -510,6 +513,11 @@ public sealed class GalaxyRepositoryClient : IAsyncDisposable
X509Certificate2 trustedRoot = X509CertificateLoader.LoadCertificateFromFile(options.CaCertificatePath);
handler.SslOptions.RemoteCertificateValidationCallback = (_, certificate, chain, errors) =>
{
if ((errors & System.Net.Security.SslPolicyErrors.RemoteCertificateNameMismatch) != 0)
{
return false;
}
if (certificate is null)
{
return false;
@@ -525,6 +533,10 @@ public sealed class GalaxyRepositoryClient : IAsyncDisposable
return customChain.Build(certificateToValidate);
};
}
else if (!options.RequireCertificateValidation)
{
handler.SslOptions.RemoteCertificateValidationCallback = (_, _, _, _) => true;
}
}
return handler;
@@ -338,6 +338,11 @@ public sealed class MxGatewayClient : IAsyncDisposable
X509Certificate2 trustedRoot = X509CertificateLoader.LoadCertificateFromFile(options.CaCertificatePath);
handler.SslOptions.RemoteCertificateValidationCallback = (_, certificate, chain, errors) =>
{
if ((errors & System.Net.Security.SslPolicyErrors.RemoteCertificateNameMismatch) != 0)
{
return false;
}
if (certificate is null)
{
return false;
+17
View File
@@ -104,6 +104,23 @@ Support:
- `credentials.NewClientTLSFromFile`,
- custom `tls.Config` for advanced callers.
### Trust posture
The gateway can serve a self-signed certificate it generates itself (it has no
PKI). To make that usable, TLS is **lenient by default**: when `Plaintext` is
`false` and no `CACertFile`/`TLSConfig`/`TransportCredentials` is supplied,
`buildCredentials` dials with `tls.Config{InsecureSkipVerify: true}` (carrying
`ServerNameOverride` as the SNI when set), so the gateway's self-signed
certificate is accepted without verification.
To verify the gateway instead:
- set `CACertFile` to pin a CA (full verification against that root), or
- set `RequireCertificateValidation: true` to verify against the OS/system trust
roots without pinning.
Pinning a CA always wins over the lenient default.
## Streaming
`Events(ctx)` should return a receive channel of:
+8
View File
@@ -75,6 +75,14 @@ client, err := mxgateway.Dial(ctx, mxgateway.Options{
})
```
The gateway can auto-generate its own self-signed certificate (it has no PKI), so
the client is **lenient by default**: a TLS connection (`Plaintext: false`) with
no `CACertFile`/`TLSConfig` accepts whatever certificate the gateway presents
(`InsecureSkipVerify`, with `ServerNameOverride` as the SNI when set). To verify
instead, set `CACertFile` to pin a CA, or set `RequireCertificateValidation:
true` to verify against the OS/system trust roots without pinning. See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
`Client.OpenSession` returns a `Session` with helpers for `Register`,
`AddItem`, `AddItem2`, `Advise`, `Write`, `Events`, and `Close`. Prefer
`SubscribeEvents` or `SubscribeEventsAfter` for long-running streams because the
+3 -5
View File
@@ -234,11 +234,9 @@ func tlsConfigForOptions(opts Options) *tls.Config {
return nil
}
return &tls.Config{
MinVersion: tls.VersionTLS12,
ServerName: opts.ServerNameOverride,
//nolint:gosec // internal tool; self-signed cert is the expected gateway default;
// opt-in to strict verification via RequireCertificateValidation.
InsecureSkipVerify: !opts.RequireCertificateValidation,
MinVersion: tls.VersionTLS12,
ServerName: opts.ServerNameOverride,
InsecureSkipVerify: !opts.RequireCertificateValidation, //nolint:gosec // internal tool; self-signed gateway cert expected; opt-in strict via RequireCertificateValidation
}
}
+17
View File
@@ -112,6 +112,23 @@ Support:
- custom CA certificate file,
- server name override for test environments.
### Trust posture
The gateway can serve a self-signed certificate it generates itself (it has no
PKI). To make that usable, TLS is **lenient by default**: when the channel is not
plaintext and no `caCertificatePath` is set, the client builds
`GrpcSslContexts.forClient().trustManager(InsecureTrustManagerFactory.INSTANCE)`
(grpc-netty-shaded), so the gateway's self-signed certificate is accepted without
verification.
To verify the gateway instead:
- set `caCertificatePath` to pin a CA (full verification against that root), or
- set `requireCertificateValidation` to `true` to verify against the JVM trust
store without pinning.
Pinning a CA always wins over the lenient default.
## Streaming
Support both:
+10
View File
@@ -57,6 +57,16 @@ try (MxGatewayClient client = MxGatewayClient.connect(options);
}
```
The gateway can auto-generate its own self-signed certificate (it has no PKI), so
the client is **lenient by default**: a TLS connection (`plaintext(false)`) with
no `caCertificatePath` accepts whatever certificate the gateway presents (via
grpc-netty-shaded's `InsecureTrustManagerFactory`). To verify instead, set
`caCertificatePath` to pin a CA, or set `requireCertificateValidation(true)` to
verify against the JVM trust store without pinning. Use `serverNameOverride` /
`--server-name-override` when the dialed host differs from the certificate SAN.
See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
Use `rawBlockingStub`, `rawFutureStub`, `rawAsyncStub`, `openSessionRaw`,
`closeSessionRaw`, `invoke`, and raw session helper methods when tests need the
underlying protobuf messages. `MxGatewayCommandException` and
+22
View File
@@ -112,6 +112,28 @@ Support:
- TLS channel with default roots,
- custom root certificate file.
### Trust posture (trust-on-first-use)
The gateway can serve a self-signed certificate it generates itself (it has no
PKI). grpc-python exposes no per-channel skip-verify hook, so the client cannot
"accept any certificate" the way the other clients do. Instead, when the channel
is not plaintext and neither `ca_file` nor `require_certificate_validation` is
set, the TLS default is **trust-on-first-use**: the client fetches the server's
presented certificate once via `ssl.get_server_certificate` (an unverified
probe), pins it as the channel's only trust root, and — because the generated
certificate always carries a `localhost` SAN — defaults
`grpc.ssl_target_name_override` to `localhost` when no `server_name_override` was
supplied (tolerating dial-by-IP or a hostname mismatch). A failed probe is
surfaced as a transport error naming the endpoint.
To verify the gateway instead:
- set `ca_file` to verify against a specific CA, or
- set `require_certificate_validation=True` to verify against the system trust
roots.
Both bypass the TOFU path.
## Streaming
Expose `stream_events` as an async iterator. Canceling the task should cancel
+11
View File
@@ -230,6 +230,17 @@ The client supports plaintext channels for local development, TLS with system
roots, TLS with a custom `ca_file`, and an optional test server name override.
API keys are redacted from option repr output and CLI error output.
The gateway can auto-generate its own self-signed certificate (it has no PKI).
grpc-python has no per-channel skip-verify, so the lenient TLS default is
**trust-on-first-use**: with no `ca_file` and `require_certificate_validation`
left `False`, the client fetches the gateway's presented certificate once
(unverified) and pins it for the channel, defaulting the SNI/target-name override
to `localhost` (the generated certificate always carries a `localhost` SAN) when
none was supplied. To verify instead, pass `ca_file` to verify against a specific
CA, or set `require_certificate_validation=True` to verify against the system
trust roots. See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
## CLI
The CLI emits deterministic JSON for automation:
@@ -74,8 +74,20 @@ class BrowseChildrenOptions:
def _split_authority(endpoint: str) -> tuple[str, int]:
"""Split a gRPC target (optionally scheme-prefixed) into (host, port)."""
"""Split a gRPC target (optionally scheme-prefixed) into (host, port).
Handles bracketed IPv6 literals (e.g. ``[::1]:5120`` or bare ``[::1]``),
returning the host without brackets so it is safe to pass to
``ssl.get_server_certificate``.
"""
target = endpoint.split("://", 1)[-1]
if target.startswith("["):
# Bracketed IPv6: "[::1]:5120" or "[::1]"
bracket_end = target.find("]")
host = target[1:bracket_end] # strip surrounding brackets
remainder = target[bracket_end + 1 :] # ":5120" or ""
port_str = remainder.lstrip(":")
return (host, int(port_str) if port_str else 443)
host, _, port = target.rpartition(":")
return (host or "localhost", int(port) if port else 443)
+11
View File
@@ -134,6 +134,17 @@ def test_split_authority_parses_host_and_port() -> None:
assert _split_authority(":5120") == ("localhost", 5120)
def test_split_authority_strips_ipv6_brackets() -> None:
from zb_mom_ww_mxgateway.options import _split_authority
# Bracketed IPv6 with port — brackets must be removed for ssl.get_server_certificate
assert _split_authority("[::1]:5120") == ("::1", 5120)
# Bare bracketed IPv6 (no port) — default port 443
assert _split_authority("[::1]") == ("::1", 443)
# Scheme-prefixed bracketed IPv6
assert _split_authority("grpc://[::1]:5120") == ("::1", 5120)
def test_tofu_connect_failure_raises_transport_error() -> None:
"""A failed cert pre-fetch surfaces the client's transport error type."""
options = ClientOptions(endpoint=f"127.0.0.1:{_free_port()}")
+13
View File
@@ -76,6 +76,19 @@ types.
cargo run -p mxgw-cli -- smoke --endpoint https://mxgateway.example.local:5001 --tls --ca-file C:\certs\mxgateway-ca.pem --server-name-override mxgateway.example.local --api-key-env MXGATEWAY_API_KEY --item TestChildObject.TestInt --json
```
### TLS trust (pin-only)
The gateway can auto-generate its own self-signed certificate (it has no PKI).
Unlike the other clients, the Rust client is **not** lenient: tonic 0.13.1
exposes no public hook to inject a custom certificate verifier, so TLS over Rust
is pin-only. A TLS connection requires either `--ca-file` /
`ClientOptions::with_ca_file(...)` to pin a CA (export the gateway's self-signed
certificate and pin it), or `--require-certificate-validation` /
`with_require_certificate_validation(true)` to verify against the system trust
roots. TLS with neither set fails `connect` with a clear, actionable error rather
than accepting the certificate. See
[Gateway Configuration](../../docs/GatewayConfiguration.md#automatic-self-signed-certificate).
## Library Surface
`ClientOptions` configures endpoint, API key, plaintext or TLS transport,
+19
View File
@@ -189,6 +189,25 @@ Support:
- custom CA file,
- domain override.
### Trust posture (pin-only)
The gateway can serve a self-signed certificate it generates itself (it has no
PKI). Rust is the **exception** to the lenient-by-default posture the other
clients use: tonic 0.13.1 exposes no public hook to inject a custom certificate
verifier, so the Rust client cannot accept an arbitrary certificate. TLS over the
Rust client is therefore **pin-only** — it requires either:
- `ClientOptions::with_ca_file(...)` to pin a CA (the supported path for the
gateway's self-signed certificate; export the certificate and pin it), or
- `ClientOptions::with_require_certificate_validation(true)` to verify against the
system trust roots.
With TLS enabled (`with_plaintext(false)`), no pinned CA, and certificate
validation not required, `GatewayClient::connect` rejects the connection with a
clear, actionable error pointing at `with_ca_file` /
`require_certificate_validation` rather than silently accepting the certificate.
The CLI exposes `--ca-file` and `--require-certificate-validation`.
## Streaming
Expose event streams as a `Stream<Item = Result<MxEvent, Error>>`. Dropping the
+8
View File
@@ -426,6 +426,11 @@ struct ConnectionArgs {
ca_file: Option<PathBuf>,
#[arg(long)]
server_name_override: Option<String>,
/// Verify the server certificate against the system trust roots even
/// without a pinned CA. The Rust client's default is to require a CA
/// file (see `--ca-file`); set this flag to use system roots instead.
#[arg(long)]
require_certificate_validation: bool,
#[arg(long, default_value_t = 10)]
connect_timeout_seconds: u64,
#[arg(long, default_value_t = 30)]
@@ -453,6 +458,9 @@ impl ConnectionArgs {
if let Some(server_name_override) = &self.server_name_override {
options = options.with_server_name_override(server_name_override);
}
if self.require_certificate_validation {
options = options.with_require_certificate_validation(true);
}
options
}
+3 -36
View File
@@ -6,10 +6,8 @@
//! code should prefer [`GatewayClient::open_session`] and the [`Session`]
//! handle it returns, rather than the `*_raw` methods.
use std::fs;
use tonic::codegen::InterceptedService;
use tonic::transport::{Certificate, Channel, ClientTlsConfig};
use tonic::transport::Channel;
use tonic::Request;
use crate::auth::AuthInterceptor;
@@ -21,7 +19,7 @@ use crate::generated::mxaccess_gateway::v1::{
OpenSessionReply, OpenSessionRequest, QueryActiveAlarmsRequest, StreamAlarmsRequest,
StreamEventsRequest,
};
use crate::options::ClientOptions;
use crate::options::{build_tls_config, ClientOptions};
use crate::session::Session;
/// Generated gateway client wrapped in the auth interceptor that
@@ -78,38 +76,7 @@ impl GatewayClient {
})?;
endpoint = endpoint.connect_timeout(options.connect_timeout());
if !options.plaintext() {
let mut tls = ClientTlsConfig::new();
if let Some(server_name) = options.server_name_override() {
tls = tls.domain_name(server_name.to_owned());
}
if let Some(ca_file) = options.ca_file() {
let certificate = fs::read(ca_file).map_err(|source| Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: format!("failed to read CA file {}: {source}", ca_file.display()),
})?;
tls = tls.ca_certificate(Certificate::from_pem(certificate));
} else if !options.require_certificate_validation() {
// Lenient-default fallback (Rust pin-only exception): tonic
// 0.13's `ClientTlsConfig` builds its rustls verifier inside a
// crate-private connector and exposes no hook for a custom
// `ServerCertVerifier`, so — unlike the other clients — the
// Rust client cannot accept an arbitrary self-signed cert. Pin
// the gateway's CA instead, or opt into strict verification
// against the system trust roots. We reject here rather than
// silently verifying against system roots (which would fail a
// self-signed gateway with a confusing handshake error).
return Err(Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: "TLS requested without a pinned CA. The Rust client cannot accept an \
arbitrary self-signed certificate (tonic 0.13 exposes no custom \
rustls verifier). Pin the gateway certificate with \
ClientOptions::with_ca_file, or call \
ClientOptions::with_require_certificate_validation(true) to verify \
against the system trust roots."
.to_owned(),
});
}
if let Some(tls) = build_tls_config(&options)? {
endpoint = endpoint.tls_config(tls)?;
}
+3 -15
View File
@@ -6,13 +6,12 @@
//! re-exported through [`crate::generated::galaxy_repository::v1`].
use std::collections::HashSet;
use std::fs;
use std::sync::Arc;
use prost_types::Timestamp;
use tokio::sync::Mutex as AsyncMutex;
use tonic::codegen::InterceptedService;
use tonic::transport::{Certificate, Channel, ClientTlsConfig};
use tonic::transport::Channel;
use tonic::Request;
use crate::auth::AuthInterceptor;
@@ -23,7 +22,7 @@ use crate::generated::galaxy_repository::v1::{
DiscoverHierarchyRequest, GalaxyObject, GetLastDeployTimeRequest, TestConnectionRequest,
WatchDeployEventsRequest,
};
use crate::options::ClientOptions;
use crate::options::{build_tls_config, ClientOptions};
const DISCOVER_HIERARCHY_PAGE_SIZE: i32 = 5000;
const BROWSE_CHILDREN_PAGE_SIZE: i32 = 500;
@@ -183,18 +182,7 @@ impl GalaxyClient {
})?;
endpoint = endpoint.connect_timeout(options.connect_timeout());
if !options.plaintext() {
let mut tls = ClientTlsConfig::new();
if let Some(server_name) = options.server_name_override() {
tls = tls.domain_name(server_name.to_owned());
}
if let Some(ca_file) = options.ca_file() {
let certificate = fs::read(ca_file).map_err(|source| Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: format!("failed to read CA file {}: {source}", ca_file.display()),
})?;
tls = tls.ca_certificate(Certificate::from_pem(certificate));
}
if let Some(tls) = build_tls_config(&options)? {
endpoint = endpoint.tls_config(tls)?;
}
+66
View File
@@ -3,10 +3,14 @@
//! chain of `with_*` setters; the `Debug` impl redacts the API key.
use std::fmt;
use std::fs;
use std::path::PathBuf;
use std::time::Duration;
use tonic::transport::{Certificate, ClientTlsConfig};
use crate::auth::ApiKey;
use crate::error::Error;
const DEFAULT_MAX_GRPC_MESSAGE_BYTES: usize = 16 * 1024 * 1024;
@@ -171,6 +175,68 @@ impl ClientOptions {
}
}
/// Build the [`ClientTlsConfig`] for a non-plaintext connection described by
/// `options`, applying the lenient-default guard that is the **Rust
/// pin-only exception**.
///
/// Returns `Ok(None)` when `options.plaintext()` is `true` (no TLS needed).
/// Returns `Ok(Some(tls))` when a valid TLS config can be assembled.
/// Returns `Err(Error::InvalidEndpoint)` when TLS is requested but no pinned
/// CA was provided and `require_certificate_validation` is `false`.
///
/// # Why this guard exists
///
/// `tonic` 0.13's `ClientTlsConfig` builds its rustls verifier inside a
/// crate-private connector and exposes no hook for a custom
/// `ServerCertVerifier`. The Rust client therefore cannot accept an arbitrary
/// self-signed certificate the way the other language clients do. Rather than
/// silently falling back to system-root verification (which always fails
/// against a self-signed gateway certificate), we reject the configuration
/// early with an actionable error.
pub(crate) fn build_tls_config(options: &ClientOptions) -> Result<Option<ClientTlsConfig>, Error> {
if options.plaintext() {
return Ok(None);
}
let mut tls = ClientTlsConfig::new();
if let Some(server_name) = options.server_name_override() {
tls = tls.domain_name(server_name.to_owned());
}
if let Some(ca_file) = options.ca_file() {
let certificate = fs::read(ca_file).map_err(|source| Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: format!("failed to read CA file {}: {source}", ca_file.display()),
})?;
tls = tls.ca_certificate(Certificate::from_pem(certificate));
} else if !options.require_certificate_validation() {
// Lenient-default fallback (Rust pin-only exception): tonic
// 0.13's `ClientTlsConfig` builds its rustls verifier inside a
// crate-private connector and exposes no hook for a custom
// `ServerCertVerifier`, so — unlike the other clients — the
// Rust client cannot accept an arbitrary self-signed cert. Pin
// the gateway's CA instead, or opt into strict verification
// against the system trust roots. We reject here rather than
// silently verifying against system roots (which would fail a
// self-signed gateway with a confusing handshake error).
//
// Note: a server-name override affects SNI (the hostname sent
// in the TLS ClientHello) but does NOT pin trust. Overriding
// the server name alone does not bypass certificate validation.
return Err(Error::InvalidEndpoint {
endpoint: options.endpoint().to_owned(),
detail: "TLS requested without a pinned CA. The Rust client cannot accept an \
arbitrary self-signed certificate (tonic 0.13 exposes no custom \
rustls verifier). Pin the gateway certificate with \
ClientOptions::with_ca_file, or call \
ClientOptions::with_require_certificate_validation(true) to verify \
against the system trust roots. Note: a server-name override \
affects SNI but does not pin trust."
.to_owned(),
});
}
Ok(Some(tls))
}
impl Default for ClientOptions {
fn default() -> Self {
Self::new("http://127.0.0.1:5000")
+35 -1
View File
@@ -10,7 +10,7 @@
use std::time::Duration;
use zb_mom_ww_mxgateway_client::{ClientOptions, Error, GatewayClient};
use zb_mom_ww_mxgateway_client::{ClientOptions, Error, GalaxyClient, GatewayClient};
/// Drive `connect` to its error without requiring `GatewayClient: Debug`
/// (the success arm is dropped explicitly so `unwrap_err` is unnecessary).
@@ -87,6 +87,40 @@ async fn tls_with_ca_file_is_permitted_and_proceeds_past_the_guard() {
);
}
/// Drive `GalaxyClient::connect` to its error (mirrors `connect_err` above).
async fn galaxy_connect_err(options: ClientOptions) -> Error {
match GalaxyClient::connect(options).await {
Ok(_client) => {
panic!("GalaxyClient::connect unexpectedly succeeded against a dead TLS address")
}
Err(error) => error,
}
}
#[tokio::test]
async fn galaxy_tls_without_ca_is_rejected_with_actionable_error_by_default() {
// GalaxyClient::connect must apply the same TLS guard as GatewayClient —
// TLS without a pinned CA (and without require_certificate_validation)
// returns a clear, actionable InvalidEndpoint error.
let options = ClientOptions::new("https://127.0.0.1:1")
.with_plaintext(false)
.with_connect_timeout(Duration::from_millis(200));
let error = galaxy_connect_err(options).await;
let Error::InvalidEndpoint { detail, .. } = error else {
panic!("expected InvalidEndpoint, got {error:?}");
};
assert!(
detail.contains("ca_file") || detail.contains("CA"),
"error should instruct the user to pass a CA file: {detail}"
);
assert!(
detail.contains("require_certificate_validation"),
"error should mention the require_certificate_validation opt-in: {detail}"
);
}
/// A throwaway self-signed CA certificate (PEM). Only needs to parse as a
/// PEM trust root so the CA-pinning path is exercised past the guard.
const SELF_SIGNED_CA_PEM: &str = "-----BEGIN CERTIFICATE-----
+13
View File
@@ -51,6 +51,19 @@ The shared inputs are:
The commands in the matrix use `MXGATEWAY_API_KEY` through each CLI's
`api-key-env` flag. They must not embed bearer tokens or raw API keys.
### TLS variant
The matrix runs over plaintext (`h2c`) by default. A TLS variant exists but stays
a manual/opt-in run, consistent with the gate above, because it needs the gateway
started with an HTTPS endpoint (an `https://` `MXGATEWAY_ENDPOINT`) and each CLI
switched to its TLS flag (`--tls` / `-tls` / `--plaintext=false` /
`plaintext=False`). The clients are lenient by default and accept the gateway's
auto-generated self-signed certificate without extra trust setup, except the Rust
CLI, which is pin-only and needs `--ca-file` or `--require-certificate-validation`
(and Python uses trust-on-first-use). See
[Gateway Configuration — Automatic self-signed certificate](./GatewayConfiguration.md#automatic-self-signed-certificate)
and each client README for the per-client TLS flags.
## JSON Comparison
Every command in the matrix requests JSON output. A runner can compare the
+36
View File
@@ -375,6 +375,42 @@ deployment-heavy box, multiply per-session SQL connections, and complicate the
cold-start path. Wire-side laziness solves the actual pain (oversized gRPC
replies and a heavy DOM) without disturbing the materialization model.
## TLS Auto-Certificate and Lenient Client Trust
Decision: when a Kestrel `https://` endpoint is configured without a certificate
of its own (and no `Kestrel:Certificates:Default` is set), the gateway generates
and persists a self-signed certificate rather than failing to start. Clients
connecting over TLS without a pinned CA accept whatever certificate the server
presents by default; pinning a CA restores full verification.
Rationale: `mxaccessgw` is an internal tool with no PKI to issue or distribute
certificates. The prior behavior — an `https` endpoint with no certificate
fails at startup with Kestrel's opaque "no server certificate was specified"
error — pushed operators toward plaintext (`h2c`), exposing the API key and
request payloads on the wire. Auto-generating a long-lived, persisted, reused
certificate lets TLS "just work" with zero certificate management, while the
lenient client default means clients connect to that self-signed certificate
without a manual trust step. Both choices are deliberate, not oversights:
strict-by-default would force PKI work this tool does not warrant. Plaintext-only
deployments are untouched — no certificate or key material is written for them —
and an operator who supplies a real certificate transparently overrides the
generated one.
Two clients diverge from "accept any certificate" because their gRPC stacks lack
a per-channel skip-verify hook:
- Python uses trust-on-first-use: it fetches the server's presented certificate
over a separate unverified probe and pins it for the channel, and defaults the
SNI/target-name override to `localhost` (the generated certificate always
carries a `localhost` SAN).
- Rust is pin-only: tonic exposes no public hook to inject a custom certificate
verifier, so TLS over Rust requires either a pinned CA or an explicit opt-in to
system-trust verification; otherwise connecting returns a clear, actionable
error.
See [Gateway Configuration — Automatic self-signed certificate](./GatewayConfiguration.md#automatic-self-signed-certificate)
and the per-client READMEs for the as-built behavior.
## Later Revisit Items
These are explicit post-v1 revisit items, not open blockers:
+179
View File
@@ -229,6 +229,185 @@ behavior.
The alarm monitor is independent of client sessions: `AcknowledgeAlarm` and
`StreamAlarms` are session-less RPCs served by the monitor.
## Host Endpoints and Transport Security (Kestrel)
The listening endpoints are **not** part of the `MxGateway` section. The gateway
uses the stock ASP.NET Core host (`WebApplication.CreateBuilder`) with no
`ConfigureKestrel` call in code, so endpoints come entirely from the standard
`Kestrel` configuration section. On the deployed hosts these values are supplied
as NSSM environment variables (`Kestrel__Endpoints__...`), not from
`appsettings.json`.
Two named endpoints are bound:
| Endpoint name | Purpose | Protocol requirement |
|---|---|---|
| `Http` | Public gRPC API (sessions, invoke, events, Galaxy browse) | HTTP/2 |
| `Dashboard` | Blazor dashboard and SignalR hubs | HTTP/1.1 (HTTP/2 optional) |
Both endpoints share one routing pipeline; the names only select which TCP port
serves which traffic. The gRPC endpoint must negotiate **HTTP/2**, which drives
the protocol settings below.
### Plaintext (current deployments)
Both running hosts (`10.100.0.48` and `wonder-app-vd03`) serve the gRPC port in
**cleartext HTTP/2 (`h2c`)**. Because cleartext HTTP/2 has no ALPN to negotiate
the protocol, the gRPC endpoint must be pinned to `Http2` with prior knowledge:
```text
Kestrel__Endpoints__Http__Url=http://0.0.0.0:5120
Kestrel__Endpoints__Http__Protocols=Http2
Kestrel__Endpoints__Dashboard__Url=http://0.0.0.0:5130
```
In this mode all client↔gateway traffic — including the
`authorization: Bearer mxgw_...` API key and any `WriteSecured` / `AuthenticateUser`
payloads — crosses the network **unencrypted**. This is acceptable only on a
trusted/isolated network segment. Prefer TLS for anything else.
### TLS
To encrypt the gRPC channel, give the `Http` endpoint an `https://` URL and a
certificate. Over TLS, ALPN negotiates HTTP/2, so the explicit `Protocols=Http2`
pin is no longer required (the default `Http1AndHttp2` works for gRPC over TLS).
`appsettings.json` form:
```json
{
"Kestrel": {
"Endpoints": {
"Http": {
"Url": "https://0.0.0.0:5120",
"Certificate": {
"Path": "C:\\ProgramData\\MxGateway\\certs\\gateway.pfx",
"Password": "<pfx-password>"
}
},
"Dashboard": {
"Url": "https://0.0.0.0:5130",
"Certificate": {
"Path": "C:\\ProgramData\\MxGateway\\certs\\gateway.pfx",
"Password": "<pfx-password>"
}
}
}
}
}
```
Equivalent NSSM environment-variable form (how config is delivered on the hosts —
see [server deploy mechanics in the project notes]):
```text
Kestrel__Endpoints__Http__Url=https://0.0.0.0:5120
Kestrel__Endpoints__Http__Certificate__Path=C:\ProgramData\MxGateway\certs\gateway.pfx
Kestrel__Endpoints__Http__Certificate__Password=<pfx-password>
Kestrel__Endpoints__Dashboard__Url=https://0.0.0.0:5130
Kestrel__Endpoints__Dashboard__Certificate__Path=C:\ProgramData\MxGateway\certs\gateway.pfx
Kestrel__Endpoints__Dashboard__Certificate__Password=<pfx-password>
```
Certificate sourcing options (any standard ASP.NET Core form is accepted):
| Form | Keys |
|---|---|
| PFX file | `Certificate:Path` (+ `Certificate:Password` if encrypted) |
| PEM pair | `Certificate:Path` (cert) + `Certificate:KeyPath` (private key) |
| Windows cert store | `Certificate:Subject`, `Certificate:Store` (e.g. `My`), `Certificate:Location` (`LocalMachine`), `Certificate:AllowInvalid` |
The certificate's CN/SAN must cover the host name clients dial (or clients must
set a server-name override — see below). The dashboard endpoint can keep its own
certificate independent of the gRPC endpoint; pair this with
`MxGateway:Dashboard:RequireHttpsCookie` (`true`) for production HTTPS.
### Automatic self-signed certificate
`mxaccessgw` is an internal tool with no PKI to issue certificates, so requiring
an operator to supply one before TLS works pushed deployments toward plaintext.
To avoid that, the gateway fills in a self-signed certificate when an HTTPS
endpoint is configured without one.
**Trigger.** At startup the gateway inspects `Kestrel:Endpoints:*`. If any
endpoint has an `https://` URL and no `Certificate` subsection of its own, and no
`Kestrel:Certificates:Default` is set, the gateway generates (or loads) a
persisted self-signed certificate and wires it in as the HTTPS *default* via
`ConfigureHttpsDefaults`. All-plaintext deployments are untouched: when no HTTPS
endpoint is configured, no certificate or key material is generated or written.
**Generated certificate.** ECDSA P-256, `serverAuth` EKU, validity ≈
`ValidityYears` (default 10 years, with one day of clock-skew slack before
`notBefore`). SANs cover `localhost`, the machine name (and its FQDN when
resolvable), each entry in `AdditionalDnsNames`, and the loopback addresses
`127.0.0.1` and `::1`.
**`MxGateway:Tls:*` options.** All optional; the zero-config path needs none of
them.
| Option | Default | Purpose |
|---|---|---|
| `Tls:SelfSignedCertPath` | `C:\ProgramData\MxGateway\certs\gateway-selfsigned.pfx` | Where the generated certificate is persisted |
| `Tls:ValidityYears` | `10` | Lifetime of the generated certificate (validated 1100) |
| `Tls:AdditionalDnsNames` | `[]` | Extra DNS SANs (e.g. a load-balancer name) |
| `Tls:RegenerateIfExpired` | `true` | Replace an expired persisted certificate instead of failing |
`ValidityYears` is validated by `GatewayOptionsValidator` (range 1100); the
"HTTPS endpoint configured but no certificate available" fail-fast lives in the
bootstrap/provider, because the validator only sees the `MxGateway` section, not
`Kestrel:Endpoints`.
**Persistence.** The PFX is written with an **empty** export password — a random
in-memory password could not be reused across restarts, which the
persist-and-reuse model requires. The private key is instead protected at rest by
filesystem permissions: a restrictive ACL on Windows (SYSTEM + Administrators,
inherited ACEs stripped) on the `certs` directory and file, and mode `0600` on
non-Windows. The write is atomic (hardened temp file, then move). The persisted
certificate is reused across restarts (stable thumbprint, so CA-pinning clients
keep working) and regenerated only when it is missing, expired (and
`RegenerateIfExpired` is `true`), or unreadable/corrupt. If the directory is not
writable or the ACL cannot be applied, the gateway fails fast with a diagnostic
naming the path rather than falling back to an in-memory certificate.
**Logging.** On generate or load, the gateway logs the certificate thumbprint,
SAN list, and `notAfter` at Information. The PFX bytes, export password, and
private key are never logged.
**Operator override.** The generated certificate is only the HTTPS *default*. To
use a real certificate, configure one explicitly — either per endpoint via
`Kestrel:Endpoints:<name>:Certificate` (`Path`/`Subject`/`Thumbprint`, etc., as
in the table above) or globally via `Kestrel:Certificates:Default`. An
explicitly-configured certificate takes precedence, and the gateway then writes
no self-signed material.
### Client side
Each official client opts into TLS explicitly. For the .NET client
(`MxGatewayClientOptions`):
| Option | Effect |
|---|---|
| `UseTls` (default `false`) | Enables TLS. Requires an `https://` endpoint; an `https://` endpoint without `UseTls` fails validation, and vice versa. |
| `CaCertificatePath` | Pins a custom root (self-signed / private CA) using `CustomRootTrust` chain validation instead of the OS trust store; the .NET client also enforces the certificate hostname/SAN match on this path. |
| `RequireCertificateValidation` (default `false`) | Forces OS/system-trust verification on a TLS connection with no pinned CA. Leave `false` for the lenient default. |
| `ServerNameOverride` | SNI / certificate host name override when the dialed host differs from the certificate CN/SAN. |
To pair with the auto-generated self-signed certificate above, the clients are
**lenient by default**: a TLS connection with no pinned CA accepts whatever
certificate the gateway presents. Pin `CaCertificatePath` to verify, or set
`RequireCertificateValidation` to force system-trust verification without
pinning. The other language clients expose the equivalent options; the exact
behavior differs per stack — Python uses trust-on-first-use and Rust is pin-only.
See each client README for the as-built behavior.
### Gateway↔worker IPC
Transport security here applies only to the public gRPC channel. The
gateway↔worker link is a per-session **named pipe**
(`mxaccess-gateway-{gatewayPid}-{sessionId}`), not a network socket. It is not
TLS-encrypted and does not need to be: it never leaves the local Windows host and
is secured by the OS pipe ACL. See [Worker Frame Protocol](./WorkerFrameProtocol.md).
## Related Documentation
- [Gateway Process Detailed Design](./GatewayProcessDesign.md)
+18
View File
@@ -243,9 +243,27 @@ services.AddGrpc(options => options.Interceptors.Add<GatewayGrpcAuthorizationInt
Because the interceptor runs before any handler, `MxAccessGatewayService` can safely assume the call has been authorized and that `IGatewayRequestIdentityAccessor.Current` is populated. The handler's only responsibility is to read the identity for `OpenSession` so the session is owned by the authenticated principal; it does not perform any authorization checks of its own. See [Authorization](./Authorization.md) for the policy and identity model.
## Transport Security
The gRPC endpoint runs over HTTP/2, in cleartext (`h2c`) or TLS depending on the
Kestrel endpoint configuration. The current deployments serve it in cleartext, so
the API key and request payloads cross the network unencrypted. The endpoint,
protocol pinning, and TLS certificate configuration — plus the corresponding
client `UseTls` / `CaCertificatePath` options — are documented in
[Host Endpoints and Transport Security](./GatewayConfiguration.md#host-endpoints-and-transport-security-kestrel).
To make TLS usable without PKI, the gateway can auto-generate and persist a
self-signed certificate when an HTTPS endpoint is configured without one, and the
language clients are lenient by default — a TLS connection with no pinned CA
accepts the presented certificate (with per-stack nuances: Python is
trust-on-first-use, Rust is pin-only). See
[Automatic self-signed certificate](./GatewayConfiguration.md#automatic-self-signed-certificate)
and each client README for the as-built behavior.
## Related Documentation
- [Contracts](./Contracts.md)
- [Sessions](./Sessions.md)
- [Authorization](./Authorization.md)
- [Gateway Configuration](./GatewayConfiguration.md)
- [Gateway Process Design](./GatewayProcessDesign.md)
@@ -274,6 +274,11 @@ public sealed class GatewayOptionsValidator : IValidateOptions<GatewayOptions>
$"MxGateway:Tls:ValidityYears must be between {MinimumCertValidityYears} and {MaximumCertValidityYears}.");
}
// The default is non-blank, so this only catches an explicitly-blanked path.
AddIfBlank(
options.SelfSignedCertPath,
"MxGateway:Tls:SelfSignedCertPath must not be blank.",
failures);
AddIfInvalidPath(
options.SelfSignedCertPath,
"MxGateway:Tls:SelfSignedCertPath must be a valid filesystem path.",
@@ -1,4 +1,7 @@
using System.Security.Cryptography.X509Certificates;
using Microsoft.AspNetCore.Hosting.StaticWebAssets;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Configuration;
using ZB.MOM.WW.MxGateway.Contracts;
using ZB.MOM.WW.MxGateway.Server.Alarms;
using ZB.MOM.WW.MxGateway.Server.Configuration;
@@ -55,6 +58,8 @@ public static class GatewayApplication
});
StaticWebAssetsLoader.UseStaticWebAssets(builder.Environment, builder.Configuration);
ConfigureSelfSignedTls(builder);
builder.Services.AddGatewayConfiguration();
builder.Services.AddSqliteAuthStore();
builder.Services.AddGatewayGrpcAuthorization();
@@ -72,6 +77,34 @@ public static class GatewayApplication
return builder;
}
private static void ConfigureSelfSignedTls(WebApplicationBuilder builder)
{
if (!Security.Tls.KestrelTlsInspector.RequiresGeneratedCertificate(builder.Configuration))
{
return;
}
Configuration.TlsOptions tlsOptions =
builder.Configuration.GetSection("MxGateway:Tls").Get<Configuration.TlsOptions>()
?? new Configuration.TlsOptions();
using ILoggerFactory loggerFactory = LoggerFactory.Create(logging =>
{
logging.AddConfiguration(builder.Configuration.GetSection("Logging"));
logging.AddConsole();
});
Security.Tls.SelfSignedCertificateProvider provider = new(
tlsOptions,
loggerFactory.CreateLogger<Security.Tls.SelfSignedCertificateProvider>(),
TimeProvider.System);
X509Certificate2 certificate = provider.LoadOrCreate();
builder.WebHost.ConfigureKestrel(options =>
// The certificate is intentionally owned by Kestrel for the application
// lifetime; it is not disposed here.
options.ConfigureHttpsDefaults(https => https.ServerCertificate = certificate));
}
private static string ResolveContentRootPath()
{
string? configuredContentRootPath = Environment.GetEnvironmentVariable("ASPNETCORE_CONTENTROOT");
@@ -18,6 +18,13 @@ public static class KestrelTlsInspector
/// </summary>
public static bool RequiresGeneratedCertificate(IConfiguration configuration)
{
// A Kestrel default certificate applies to every endpoint that lacks its own.
// If the operator configured one, the gateway must not override it.
if (HasConfiguredCertificate(configuration.GetSection("Kestrel:Certificates:Default")))
{
return false;
}
IConfigurationSection endpoints = configuration.GetSection("Kestrel:Endpoints");
foreach (IConfigurationSection endpoint in endpoints.GetChildren())
{
@@ -28,13 +35,7 @@ public static class KestrelTlsInspector
continue;
}
IConfigurationSection certificate = endpoint.GetSection("Certificate");
bool hasOwnCertificate =
!string.IsNullOrWhiteSpace(certificate["Path"]) ||
!string.IsNullOrWhiteSpace(certificate["Subject"]) ||
!string.IsNullOrWhiteSpace(certificate["Thumbprint"]);
if (!hasOwnCertificate)
if (!HasConfiguredCertificate(endpoint.GetSection("Certificate")))
{
return true;
}
@@ -42,4 +43,9 @@ public static class KestrelTlsInspector
return false;
}
private static bool HasConfiguredCertificate(IConfigurationSection certificate)
=> !string.IsNullOrWhiteSpace(certificate["Path"]) ||
!string.IsNullOrWhiteSpace(certificate["Subject"]) ||
!string.IsNullOrWhiteSpace(certificate["Thumbprint"]);
}
@@ -1,4 +1,5 @@
using System.Net;
using System.Net.Sockets;
using System.Security.Cryptography;
using System.Security.Cryptography.X509Certificates;
using Microsoft.Extensions.Logging;
@@ -13,6 +14,7 @@ namespace ZB.MOM.WW.MxGateway.Server.Security.Tls;
public sealed class SelfSignedCertificateProvider
{
private const string ServerAuthOid = "1.3.6.1.5.5.7.3.1";
private const string SubjectAltNameOid = "2.5.29.17";
private readonly TlsOptions _options;
private readonly ILogger<SelfSignedCertificateProvider> _logger;
@@ -37,9 +39,10 @@ public sealed class SelfSignedCertificateProvider
key,
HashAlgorithmName.SHA256);
request.CertificateExtensions.Add(new X509BasicConstraintsExtension(false, false, 0, true));
// End-entity (non-CA) certificate: RFC 5280 marks BasicConstraints critical only for CA certs.
request.CertificateExtensions.Add(new X509BasicConstraintsExtension(false, false, 0, false));
request.CertificateExtensions.Add(new X509KeyUsageExtension(
X509KeyUsageFlags.DigitalSignature | X509KeyUsageFlags.KeyEncipherment,
X509KeyUsageFlags.DigitalSignature,
critical: true));
request.CertificateExtensions.Add(new X509EnhancedKeyUsageExtension(
[new Oid(ServerAuthOid, "Server Authentication")],
@@ -53,6 +56,21 @@ public sealed class SelfSignedCertificateProvider
san.AddDnsName(machine);
}
// Best-effort: add the machine FQDN when it differs from the short name and "localhost".
// GetHostEntry may fail if DNS is unavailable; skip silently in that case.
try
{
string fqdn = Dns.GetHostEntry(machine).HostName;
if (!string.IsNullOrWhiteSpace(fqdn)
&& !fqdn.Equals("localhost", StringComparison.OrdinalIgnoreCase)
&& !fqdn.Equals(machine, StringComparison.OrdinalIgnoreCase))
{
san.AddDnsName(fqdn);
}
}
catch (SocketException) { /* DNS not resolvable — FQDN SAN is optional */ }
catch (ArgumentException) { /* invalid host name — skip */ }
foreach (string extra in _options.AdditionalDnsNames)
{
if (!string.IsNullOrWhiteSpace(extra))
@@ -68,4 +86,154 @@ public sealed class SelfSignedCertificateProvider
DateTimeOffset now = _timeProvider.GetUtcNow();
return request.CreateSelfSigned(now.AddDays(-1), now.AddYears(_options.ValidityYears));
}
/// <summary>Loads the persisted certificate, regenerating when missing,
/// expired (and allowed), or unreadable.</summary>
public X509Certificate2 LoadOrCreate()
{
string path = _options.SelfSignedCertPath;
if (string.IsNullOrWhiteSpace(path))
{
throw new InvalidOperationException(
"MxGateway:Tls:SelfSignedCertPath must be set when an HTTPS endpoint has no certificate.");
}
if (File.Exists(path))
{
try
{
X509Certificate2 existing = X509CertificateLoader.LoadPkcs12FromFile(path, password: null, KeyStorageFlags());
if (existing.NotAfter.ToUniversalTime() > _timeProvider.GetUtcNow().UtcDateTime)
{
Log("Loaded", existing);
return existing;
}
if (!_options.RegenerateIfExpired)
{
string notAfter = existing.NotAfter.ToUniversalTime().ToString("u");
existing.Dispose();
throw new InvalidOperationException(
$"Persisted gateway certificate at '{path}' expired on {notAfter} " +
"and MxGateway:Tls:RegenerateIfExpired is false.");
}
_logger.LogWarning(
"Persisted gateway certificate at {Path} expired on {NotAfter:u}; regenerating.",
path, existing.NotAfter.ToUniversalTime());
existing.Dispose();
}
catch (CryptographicException ex)
{
_logger.LogWarning(ex,
"Persisted gateway certificate at {Path} is unreadable; regenerating.", path);
}
catch (Exception ex) when (ex is UnauthorizedAccessException or IOException)
{
// A permission/IO error must fail fast: do not regenerate (which would
// overwrite the operator's file), surface the cause instead.
throw new InvalidOperationException(
$"Persisted gateway certificate at '{path}' could not be read; the gateway " +
"process lacks read access or the file is otherwise inaccessible.", ex);
}
}
return GenerateAndPersist(path);
}
private X509Certificate2 GenerateAndPersist(string path)
{
using X509Certificate2 generated = GenerateCertificate();
byte[] pfx = generated.Export(X509ContentType.Pkcs12);
try
{
string? directory = Path.GetDirectoryName(path);
if (!string.IsNullOrEmpty(directory))
{
Directory.CreateDirectory(directory);
}
// The private-key bytes must never touch a world-readable file. Create the
// temp file empty, harden its permissions, and only then write the PFX into
// the already-protected file. The temp path is in the same directory as the
// target so the Move is atomic and preserves the hardened DACL/mode.
string temp = path + ".tmp";
using (File.Create(temp)) { }
HardenPermissions(temp);
// Writing into an existing file truncates content but preserves its ACL/mode.
// If the write or move fails the hardened temp file (which may contain private-key
// material) must not be left on disk; delete it best-effort before rethrowing.
try
{
File.WriteAllBytes(temp, pfx);
File.Move(temp, path, overwrite: true);
HardenPermissions(path);
}
catch (Exception)
{
try { File.Delete(temp); } catch { /* best effort */ }
throw;
}
X509Certificate2 loaded = X509CertificateLoader.LoadPkcs12FromFile(path, password: null, KeyStorageFlags());
Log("Generated", loaded);
return loaded;
}
finally
{
// pfx holds raw private-key material; clear it as soon as it is no longer needed.
Array.Clear(pfx, 0, pfx.Length);
}
}
private static X509KeyStorageFlags KeyStorageFlags()
=> OperatingSystem.IsWindows()
? X509KeyStorageFlags.MachineKeySet | X509KeyStorageFlags.PersistKeySet | X509KeyStorageFlags.Exportable
: X509KeyStorageFlags.Exportable;
private static void HardenPermissions(string path)
{
if (OperatingSystem.IsWindows())
{
HardenWindowsAcl(path);
}
else
{
File.SetUnixFileMode(path, UnixFileMode.UserRead | UnixFileMode.UserWrite);
}
}
[System.Runtime.Versioning.SupportedOSPlatform("windows")]
private static void HardenWindowsAcl(string path)
{
FileInfo file = new(path);
System.Security.AccessControl.FileSecurity security = new();
security.SetAccessRuleProtection(isProtected: true, preserveInheritance: false);
foreach (System.Security.Principal.WellKnownSidType sid in new[]
{
System.Security.Principal.WellKnownSidType.LocalSystemSid,
System.Security.Principal.WellKnownSidType.BuiltinAdministratorsSid,
})
{
System.Security.Principal.SecurityIdentifier identifier = new(sid, null);
security.AddAccessRule(new System.Security.AccessControl.FileSystemAccessRule(
identifier,
System.Security.AccessControl.FileSystemRights.FullControl,
System.Security.AccessControl.AccessControlType.Allow));
}
file.SetAccessControl(security);
}
private void Log(string action, X509Certificate2 cert)
{
string sans = cert.Extensions
.FirstOrDefault(e => e.Oid?.Value == SubjectAltNameOid)?
.Format(false) ?? "(none)";
_logger.LogInformation(
"{Action} gateway self-signed certificate: thumbprint={Thumbprint}, notAfter={NotAfter:u}, sans={Sans}",
action, cert.Thumbprint, cert.NotAfter.ToUniversalTime(), sans);
}
}
@@ -56,4 +56,13 @@ public sealed class GatewayOptionsValidatorTests
Assert.True(result.Failed);
Assert.Contains(result.Failures!, f => f.Contains("MxGateway:Tls:AdditionalDnsNames"));
}
[Fact]
public void Validate_Fails_WhenSelfSignedCertPathBlank()
{
GatewayOptions options = CloneWithTls(ValidOptions(), new TlsOptions { SelfSignedCertPath = " " });
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
Assert.True(result.Failed);
Assert.Contains(result.Failures!, f => f.Contains("MxGateway:Tls:SelfSignedCertPath must not be blank."));
}
}
@@ -0,0 +1,72 @@
using System.Net.Security;
using System.Net.Sockets;
using System.Security.Cryptography.X509Certificates;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting.Server;
using Microsoft.AspNetCore.Hosting.Server.Features;
using Microsoft.Extensions.DependencyInjection;
using ZB.MOM.WW.MxGateway.Server;
namespace ZB.MOM.WW.MxGateway.Tests.Gateway;
public sealed class GatewayTlsBootstrapTests
{
/// <summary>
/// Verifies that when a Kestrel HTTPS endpoint is configured without its own certificate,
/// the gateway supplies the generated self-signed certificate as the Kestrel HTTPS default.
/// The host must start and bind, and the certificate served on the TLS handshake must be the
/// gateway's generated cert (subject <c>CN=MxAccessGateway Self-Signed</c>) — not an ambient
/// ASP.NET Core development certificate. On a host with no dev cert installed, starting a
/// cert-less HTTPS endpoint throws "No server certificate was specified"; on a host that has a
/// trusted dev cert, Kestrel would otherwise serve that dev cert (<c>CN=localhost</c>), so the
/// subject assertion is what makes this test fail without the wiring on either kind of host.
/// </summary>
[Fact]
public async Task Host_ServesGeneratedCertificate_WhenHttpsEndpointHasNoCertificate()
{
string certDir = Directory.CreateTempSubdirectory().FullName;
try
{
Environment.SetEnvironmentVariable("Kestrel__Endpoints__Test__Url", "https://127.0.0.1:0");
Environment.SetEnvironmentVariable(
"MxGateway__Tls__SelfSignedCertPath", Path.Combine(certDir, "gw.pfx"));
WebApplication app = GatewayApplication.Build([]);
await app.StartAsync();
try
{
string servedSubject = await ReadServedCertificateSubjectAsync(app);
Assert.Contains("MxAccessGateway Self-Signed", servedSubject, StringComparison.Ordinal);
}
finally
{
await app.StopAsync();
await app.DisposeAsync();
}
}
finally
{
Environment.SetEnvironmentVariable("Kestrel__Endpoints__Test__Url", null);
Environment.SetEnvironmentVariable("MxGateway__Tls__SelfSignedCertPath", null);
Directory.Delete(certDir, recursive: true);
}
}
private static async Task<string> ReadServedCertificateSubjectAsync(WebApplication app)
{
IServerAddressesFeature addresses =
app.Services.GetRequiredService<IServer>().Features.Get<IServerAddressesFeature>()
?? throw new InvalidOperationException("Server addresses feature was not available.");
Uri endpoint = new(addresses.Addresses.First());
using TcpClient client = new();
await client.ConnectAsync(endpoint.Host, endpoint.Port);
using SslStream ssl = new(
client.GetStream(),
leaveInnerStreamOpen: false,
userCertificateValidationCallback: (_, _, _, _) => true);
await ssl.AuthenticateAsClientAsync("127.0.0.1");
return ssl.RemoteCertificate?.Subject ?? "(none)";
}
}
@@ -51,6 +51,13 @@ public sealed class KestrelTlsInspectorTests
=> Assert.True(KestrelTlsInspector.RequiresGeneratedCertificate(
Config(("Kestrel:Endpoints:Https:Url", "HTTPS://0.0.0.0:5120"))));
[Fact]
public void RequiresGeneratedCertificate_False_WhenKestrelDefaultCertificateConfigured()
=> Assert.False(KestrelTlsInspector.RequiresGeneratedCertificate(
Config(
("Kestrel:Endpoints:Https:Url", "https://0.0.0.0:5120"),
("Kestrel:Certificates:Default:Path", @"C:\certs\default.pfx"))));
[Fact]
public void RequiresGeneratedCertificate_True_WhenMixedEndpointsAndOneHttpsHasNoCert()
=> Assert.True(KestrelTlsInspector.RequiresGeneratedCertificate(
@@ -27,14 +27,130 @@ public sealed class SelfSignedCertificateProviderTests
string sans = ReadSubjectAltNames(cert);
Assert.Contains("localhost", sans);
Assert.Contains("gw.internal", sans);
Assert.Contains(Environment.MachineName, sans);
// Format() renders IP SANs as "IP Address:<addr>"; the IPv6 loopback may appear
// as "::1" or its expanded form depending on the platform crypto library.
Assert.Contains("127.0.0.1", sans);
Assert.True(sans.Contains("::1") || sans.Contains("0:0:0:0:0:0:0:1"),
$"Expected IPv6 loopback in SANs but got: {sans}");
X509EnhancedKeyUsageExtension eku = cert.Extensions.OfType<X509EnhancedKeyUsageExtension>().Single();
Assert.Contains(eku.EnhancedKeyUsages.Cast<System.Security.Cryptography.Oid>(),
o => o.Value == "1.3.6.1.5.5.7.3.1"); // serverAuth
}
[Fact]
public void LoadOrCreate_GeneratesPersistsAndReuses_SameThumbprint()
{
string dir = Directory.CreateTempSubdirectory().FullName;
try
{
string path = Path.Combine(dir, "gw.pfx");
FakeTimeProvider time = new(new DateTimeOffset(2026, 1, 1, 0, 0, 0, TimeSpan.Zero));
TlsOptions options = new() { SelfSignedCertPath = path };
using X509Certificate2 first = CreateProvider(options, time).LoadOrCreate();
Assert.True(File.Exists(path));
using X509Certificate2 second = CreateProvider(options, time).LoadOrCreate();
Assert.Equal(first.Thumbprint, second.Thumbprint); // reused, not regenerated
}
finally { Directory.Delete(dir, recursive: true); }
}
[Fact]
public void LoadOrCreate_Regenerates_WhenPersistedCertExpired()
{
string dir = Directory.CreateTempSubdirectory().FullName;
try
{
string path = Path.Combine(dir, "gw.pfx");
FakeTimeProvider time = new(new DateTimeOffset(2026, 1, 1, 0, 0, 0, TimeSpan.Zero));
TlsOptions options = new() { SelfSignedCertPath = path, ValidityYears = 1 };
using X509Certificate2 first = CreateProvider(options, time).LoadOrCreate();
time.Advance(TimeSpan.FromDays(800)); // past 1-year validity
using X509Certificate2 second = CreateProvider(options, time).LoadOrCreate();
Assert.NotEqual(first.Thumbprint, second.Thumbprint);
}
finally { Directory.Delete(dir, recursive: true); }
}
[Fact]
public void LoadOrCreate_Regenerates_WhenPersistedFileCorrupt()
{
string dir = Directory.CreateTempSubdirectory().FullName;
try
{
string path = Path.Combine(dir, "gw.pfx");
File.WriteAllText(path, "not a pfx");
TlsOptions options = new() { SelfSignedCertPath = path };
using X509Certificate2 cert = CreateProvider(options, new FakeTimeProvider()).LoadOrCreate();
Assert.True(cert.HasPrivateKey);
}
finally { Directory.Delete(dir, recursive: true); }
}
[Fact]
public void LoadOrCreate_Throws_WhenExpiredAndRegenerateDisabled()
{
string dir = Directory.CreateTempSubdirectory().FullName;
try
{
string path = Path.Combine(dir, "gw.pfx");
FakeTimeProvider time = new(new DateTimeOffset(2026, 1, 1, 0, 0, 0, TimeSpan.Zero));
TlsOptions options = new() { SelfSignedCertPath = path, ValidityYears = 1, RegenerateIfExpired = false };
using (CreateProvider(options, time).LoadOrCreate()) { }
time.Advance(TimeSpan.FromDays(800));
Assert.Throws<InvalidOperationException>(() => CreateProvider(options, time).LoadOrCreate());
}
finally { Directory.Delete(dir, recursive: true); }
}
[Fact]
public void LoadOrCreate_Throws_WhenSelfSignedCertPathBlank()
{
TlsOptions options = new() { SelfSignedCertPath = " " };
Assert.Throws<InvalidOperationException>(
() => CreateProvider(options, new FakeTimeProvider()).LoadOrCreate());
}
/// <summary>
/// Verifies that GenerateAndPersist cleans up the hardened .tmp file when persist fails.
/// The failure is induced by setting SelfSignedCertPath to a path whose parent directory
/// is an existing regular file, causing Directory.CreateDirectory (or the subsequent write)
/// to throw an IOException/UnauthorizedAccessException.
/// </summary>
[Fact]
public void LoadOrCreate_DeletesTempFile_WhenPersistFails()
{
string outerDir = Directory.CreateTempSubdirectory().FullName;
try
{
// Create a regular file at what would be the parent directory of the cert path.
// Any attempt to create that "directory" or write files into it must fail.
string fileActingAsDir = Path.Combine(outerDir, "notadir");
File.WriteAllText(fileActingAsDir, "block");
// Point the cert path inside the regular file — Directory.CreateDirectory will
// throw because the parent path component is a file, not a directory.
string certPath = Path.Combine(fileActingAsDir, "gw.pfx");
string expectedTemp = certPath + ".tmp";
TlsOptions options = new() { SelfSignedCertPath = certPath };
Assert.ThrowsAny<Exception>(() => CreateProvider(options, new FakeTimeProvider()).LoadOrCreate());
// The .tmp file must not be left behind.
Assert.False(File.Exists(expectedTemp), $"Leaked temp file: {expectedTemp}");
}
finally { Directory.Delete(outerDir, recursive: true); }
}
private const string SubjectAltNameOid = "2.5.29.17";
private static string ReadSubjectAltNames(X509Certificate2 cert)
=> cert.Extensions
.First(e => e.Oid?.Value == "2.5.29.17")
.First(e => e.Oid?.Value == SubjectAltNameOid)
.Format(false);
}