python + typescript + bash: mirror the durability fixes from go/

Parity pass on the other three language templates. Same guarantees as go/: survive server restart, client restart, half-open TCP, and long outages; rejoin and drain prime-side backlog on reconnect, without the user writing any of this in process.*. python/main.py: - grpc.keepalive_time_ms=10000, keepalive_timeout_ms=3000, keepalive_permit_without_calls=1 on the channel. Half-open TCP is detected within ~13s instead of the OS default ~2h. - Exponential backoff with jitter; max_backoff_seconds config ceiling (default 120). Attempts counter resets after a session runs healthy for 60s so transient restarts don't escalate the delay. - chain_id added as a required config field and sent as the x-chain-id gRPC metadata header (prime rejects streams without it). typescript/src/main.ts: - Same keepalive options on the @grpc/grpc-js client. - Same exponential backoff + jitter logic. - chain_id added to Config + metadata. bash/: - Config + README updated. The bash template uses Python's main.py as its runtime, so the behavioural changes above flow through without a separate main per language. Docs: each README gains a "Durability guarantees" section so contract authors see the invariants without reading the runtime code.
2026-04-19 21:32:24 -04:00
parent 2bc57c073d
commit f22fb29964
8 changed files with 216 additions and 35 deletions
--- a/typescript/src/main.ts
+++ b/typescript/src/main.ts
@@ -33,16 +33,29 @@ const SmartContractService = protoDescriptor.remote_sc.SmartContractService;
 // Configuration and Client Infrastructure
 // Do not modify this file unless you need to customize the client behavior.
 // Implement your smart contract logic in process.ts instead.
+//
+// Durability contract (provided by this file, no work for the user):
+//   - If the Dragonchain Prime server restarts, updates, or momentarily
+//     drops the network, this client auto-reconnects. Transactions
+//     observed during the outage are queued by prime and delivered once
+//     the stream is re-established.
+//   - If this client restarts (crash, deploy, long sleep), it rejoins
+//     the stream and prime re-delivers every still-pending transaction
+//     that should have invoked it, oldest first.
+//   - Half-open TCP (a silent peer that never sent FIN) is detected
+//     within ~13 s via gRPC keepalive pings. No dangling ghost streams.
 // =============================================================================

 interface Config {
  serverAddress: string;
+  chainId: string;
  smartContractId: string;
  apiKey: string;
  useTls: boolean;
  tlsCertPath?: string;
  numWorkers: number;
  reconnectDelaySeconds: number;
+  maxBackoffSeconds: number;
  maxReconnectAttempts: number;
 }

@@ -91,9 +104,23 @@ class SmartContractClient {
        credentials = grpc.credentials.createInsecure();
      }

+      // Keepalive is the load-bearing piece for detecting a half-open
+      // connection. Without it, a silent peer (prime restarted without
+      // sending FIN; laptop resumed from sleep; corporate NAT dropped
+      // the flow) leaves us in a "connected" state until the OS-level
+      // TCP keepalive fires — on Linux ~2 hours by default. 10 s ping
+      // + 3 s timeout catches all of that within ~13 s.
+      const channelOptions = {
+        "grpc.keepalive_time_ms": 10000,
+        "grpc.keepalive_timeout_ms": 3000,
+        "grpc.keepalive_permit_without_calls": 1,
+        "grpc.http2.max_pings_without_data": 0,
+      };
+
      this.client = new SmartContractService(
        this.config.serverAddress,
-        credentials
+        credentials,
+        channelOptions
      );

      console.log(`[SC-Client] Connected to server at ${this.config.serverAddress}`);
@@ -175,10 +202,13 @@ class SmartContractClient {

    this.running = true;

-    // Create metadata for authentication
+    // Create metadata for authentication + routing. x-chain-id is
+    // required by prime; missing it yields "missing chain ID" and the
+    // stream never receives transactions.
    const metadata = new grpc.Metadata();
    metadata.add("x-api-key", this.config.apiKey);
    metadata.add("x-smart-contract-id", this.config.smartContractId);
+    metadata.add("x-chain-id", this.config.chainId);

    return new Promise((resolve) => {
      // Establish bi-directional stream
@@ -255,12 +285,14 @@ class SmartContractClient {

 interface RawConfig {
  server_address: string;
+  chain_id: string;
  smart_contract_id: string;
  api_key: string;
  use_tls?: boolean;
  tls_cert_path?: string;
  num_workers?: number;
  reconnect_delay_seconds?: number;
+  max_backoff_seconds?: number;
  max_reconnect_attempts?: number;
 }

@@ -269,7 +301,7 @@ function loadConfig(configPath: string): Config {
  const raw = yaml.load(content) as RawConfig;

  // Validate required fields
-  const required = ["server_address", "smart_contract_id", "api_key"];
+  const required = ["server_address", "chain_id", "smart_contract_id", "api_key"];
  for (const field of required) {
    if (!(field in raw) || !raw[field as keyof RawConfig]) {
      throw new Error(`Missing required config field: ${field}`);
@@ -278,16 +310,33 @@ function loadConfig(configPath: string): Config {

  return {
    serverAddress: raw.server_address,
+    chainId: raw.chain_id,
    smartContractId: raw.smart_contract_id,
    apiKey: raw.api_key,
    useTls: raw.use_tls ?? false,
    tlsCertPath: raw.tls_cert_path,
    numWorkers: raw.num_workers ?? 10,
-    reconnectDelaySeconds: raw.reconnect_delay_seconds ?? 5,
+    reconnectDelaySeconds: raw.reconnect_delay_seconds ?? 3,
+    maxBackoffSeconds: raw.max_backoff_seconds ?? 120,
    maxReconnectAttempts: raw.max_reconnect_attempts ?? 0,
  };
 }

+/**
+ * Compute the next reconnect delay in milliseconds using exponential
+ * backoff with jitter. base * 2^attempts, capped at maxBackoffSeconds,
+ * plus random(0, base) jitter so many clients don't reconnect in
+ * lockstep after a server restart.
+ */
+function nextBackoffMs(config: Config, attempts: number): number {
+  const baseSec = Math.max(config.reconnectDelaySeconds, 1);
+  const capSec = Math.max(config.maxBackoffSeconds, baseSec);
+  const shift = Math.min(attempts, 10); // clamp exponent
+  const delaySec = Math.min(capSec, baseSec * 2 ** shift);
+  const jitterSec = Math.random() * baseSec;
+  return Math.round((delaySec + jitterSec) * 1000);
+}
+
 // =============================================================================
 // Main Entry Point
 // =============================================================================
@@ -325,13 +374,19 @@ async function main(): Promise<void> {
  process.on("SIGINT", shutdown);
  process.on("SIGTERM", shutdown);

-  // Connection loop with reconnection logic
+  // Connection loop with reconnection logic. A session that runs
+  // healthy for 60+ seconds resets the attempts counter so the next
+  // failure starts the exponential backoff schedule fresh.
  let attempts = 0;
+  const HEALTHY_RUN_MS = 60 * 1000;

  while (true) {
    if (client.connect()) {
-      attempts = 0;
+      const start = Date.now();
      const success = await client.run();
+      if (Date.now() - start > HEALTHY_RUN_MS) {
+        attempts = 0;
+      }
      if (!success) {
        // Check if it was a graceful shutdown
        client.close();
@@ -352,12 +407,12 @@ async function main(): Promise<void> {
      break;
    }

-    const delay = config.reconnectDelaySeconds;
+    const delayMs = nextBackoffMs(config, attempts - 1);
    console.log(
-      `[SC-Client] Reconnecting in ${delay} seconds (attempt ${attempts})...`
+      `[SC-Client] Reconnecting in ${(delayMs / 1000).toFixed(1)} seconds (attempt ${attempts})...`
    );

-    await new Promise((resolve) => setTimeout(resolve, delay * 1000));
+    await new Promise((resolve) => setTimeout(resolve, delayMs));
  }

  console.log("[SC-Client] Client shut down");