sc-templates/python/main.py

#!/usr/bin/env python3
"""
Dragonchain Smart Contract Client

A gRPC client that connects to Dragonchain Prime server to process
smart contract transactions.

Do not modify this file unless you need to customize the client behavior.
Implement your smart contract logic in process.py instead.
"""

import argparse
import json
import logging
import queue
import random
import signal
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Any, Optional

import grpc
import yaml

import remote_sc_pb2 as pb
import remote_sc_pb2_grpc as pb_grpc
from process import ProcessResult, process

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger("SmartContract")


# =============================================================================
# Configuration and Client Infrastructure
# Do not modify this file unless you need to customize the client behavior.
# Implement your smart contract logic in process.py instead.
#
# Durability contract (provided by this file, no work for the user):
#   - If the Dragonchain Prime server restarts, updates, or momentarily
#     drops the network, this client auto-reconnects. Transactions
#     observed during the outage are queued by prime and delivered once
#     the stream is re-established.
#   - If this client restarts (crash, deploy, long sleep), it rejoins
#     the stream and prime re-delivers every still-pending transaction
#     that should have invoked it, oldest first.
#   - Half-open TCP (a silent peer that never sent FIN) is detected
#     within ~13 s via gRPC keepalive pings. No dangling ghost streams.
# =============================================================================


@dataclass
class Config:
    """Client configuration loaded from YAML."""

    server_address: str
    chain_id: str
    smart_contract_id: str
    api_key: str
    use_tls: bool = False
    tls_cert_path: Optional[str] = None
    num_workers: int = 10
    reconnect_delay_seconds: int = 3
    max_backoff_seconds: int = 120
    max_reconnect_attempts: int = 0  # 0 = infinite


class SmartContractClient:
    """gRPC client for smart contract execution."""

    def __init__(self, config: Config):
        self.config = config
        self.channel: Optional[grpc.Channel] = None
        self.stub: Optional[pb_grpc.SmartContractServiceStub] = None
        self.running = False
        self.work_queue: queue.Queue = queue.Queue()
        self.response_queue: queue.Queue = queue.Queue()
        self.executor: Optional[ThreadPoolExecutor] = None

    def connect(self) -> bool:
        """Establish connection to the gRPC server."""
        try:
            # Keepalive is the load-bearing piece for detecting a
            # half-open connection. Without it, a silent peer (prime
            # restarted without sending FIN; laptop resumed from sleep;
            # corporate NAT dropped the flow) leaves us in a "connected"
            # state until the OS-level TCP keepalive fires — on Linux
            # that's ~2 hours by default. 10 s ping + 3 s timeout
            # catches all of that within ~13 s.
            channel_options = [
                ("grpc.keepalive_time_ms", 10000),
                ("grpc.keepalive_timeout_ms", 3000),
                ("grpc.keepalive_permit_without_calls", 1),
                ("grpc.http2.max_pings_without_data", 0),
            ]

            if self.config.use_tls:
                if not self.config.tls_cert_path:
                    logger.error("TLS enabled but no certificate path provided")
                    return False
                with open(self.config.tls_cert_path, "rb") as f:
                    creds = grpc.ssl_channel_credentials(f.read())
                self.channel = grpc.secure_channel(
                    self.config.server_address, creds, options=channel_options
                )
            else:
                self.channel = grpc.insecure_channel(
                    self.config.server_address, options=channel_options
                )

            self.stub = pb_grpc.SmartContractServiceStub(self.channel)
            logger.info(f"Connected to server at {self.config.server_address}")
            return True
        except Exception as e:
            logger.error(f"Failed to connect: {e}")
            return False

    def close(self):
        """Close the gRPC connection."""
        if self.channel:
            self.channel.close()
            self.channel = None
            self.stub = None

    def _response_generator(self):
        """Generator that yields responses from the response queue."""
        while self.running:
            try:
                response = self.response_queue.get(timeout=1.0)
                if response is None:
                    break
                yield response
            except queue.Empty:
                continue

    def _process_request(self, request: pb.SmartContractRequest):
        """Process a single request and queue the response."""
        logs = ""

        try:
            result = process(
                tx_json=request.transaction_json,
                env_vars=dict(request.env_vars),
                secrets=dict(request.secrets),
            )

            response = pb.SmartContractResponse(
                transaction_id=request.transaction_id,
                output_to_chain=result.output_to_chain,
                logs=logs,
            )

            if result.error:
                response.error = result.error
                logger.error(
                    f"Error processing transaction {request.transaction_id}: {result.error}"
                )
            else:
                response.result_json = json.dumps(result.data) if result.data else "{}"
                logger.info(f"Successfully processed transaction {request.transaction_id}")

        except Exception as e:
            response = pb.SmartContractResponse(
                transaction_id=request.transaction_id,
                error=str(e),
                logs=logs,
            )
            logger.exception(f"Exception processing transaction {request.transaction_id}")

        self.response_queue.put(response)

    def _worker(self):
        """Worker thread that processes requests from the queue."""
        while self.running:
            try:
                request = self.work_queue.get(timeout=1.0)
                if request is None:
                    break
                self._process_request(request)
            except queue.Empty:
                continue

    def run(self) -> bool:
        """Run the client and process incoming requests."""
        if not self.stub:
            logger.error("Not connected to server")
            return False

        self.running = True
        self.executor = ThreadPoolExecutor(max_workers=self.config.num_workers)

        # Start worker threads
        workers = []
        for _ in range(self.config.num_workers):
            future = self.executor.submit(self._worker)
            workers.append(future)

        logger.info(f"Started {self.config.num_workers} worker threads")

        # Create metadata for authentication + routing. x-chain-id is
        # required by prime; missing it yields "missing chain ID" and
        # the stream never receives transactions.
        metadata = [
            ("x-api-key", self.config.api_key),
            ("x-smart-contract-id", self.config.smart_contract_id),
            ("x-chain-id", self.config.chain_id),
        ]

        try:
            # Establish bi-directional stream
            stream = self.stub.Run(self._response_generator(), metadata=metadata)

            logger.info("Stream established, waiting for requests...")

            # Receive and dispatch requests
            for request in stream:
                if not self.running:
                    break
                logger.info(f"Received request: transaction_id={request.transaction_id}")
                self.work_queue.put(request)

            logger.info("Server closed the stream")
            return True

        except grpc.RpcError as e:
            logger.error(f"gRPC error: {e.code()} - {e.details()}")
            return False
        except Exception as e:
            logger.exception(f"Error in run loop: {e}")
            return False
        finally:
            self.running = False

            # Signal workers to stop
            for _ in range(self.config.num_workers):
                self.work_queue.put(None)
            self.response_queue.put(None)

            # Wait for workers to finish
            if self.executor:
                self.executor.shutdown(wait=True)

    def stop(self):
        """Stop the client gracefully."""
        logger.info("Stopping client...")
        self.running = False


def load_config(path: str) -> Config:
    """Load configuration from a YAML file."""
    with open(path, "r") as f:
        data = yaml.safe_load(f)

    # Validate required fields
    required = ["server_address", "chain_id", "smart_contract_id", "api_key"]
    for field in required:
        if field not in data or not data[field]:
            raise ValueError(f"Missing required config field: {field}")

    return Config(
        server_address=data["server_address"],
        chain_id=data["chain_id"],
        smart_contract_id=data["smart_contract_id"],
        api_key=data["api_key"],
        use_tls=data.get("use_tls", False),
        tls_cert_path=data.get("tls_cert_path"),
        num_workers=data.get("num_workers", 10),
        reconnect_delay_seconds=data.get("reconnect_delay_seconds", 3),
        max_backoff_seconds=data.get("max_backoff_seconds", 120),
        max_reconnect_attempts=data.get("max_reconnect_attempts", 0),
    )


def next_backoff(config: Config, attempts: int) -> float:
    """Compute the next reconnect delay in seconds using exponential
    backoff with jitter. base * 2^attempts, capped at max_backoff_seconds,
    plus random(0, base) jitter so many clients don't reconnect in
    lockstep after a server restart."""
    base = max(config.reconnect_delay_seconds, 1)
    cap = max(config.max_backoff_seconds, base)
    shift = min(attempts, 10)  # clamp exponent
    delay = min(cap, base * (2 ** shift))
    jitter = random.uniform(0, base)
    return delay + jitter


def main():
    parser = argparse.ArgumentParser(description="Dragonchain Smart Contract Client")
    parser.add_argument(
        "--config",
        "-c",
        default="config.yaml",
        help="Path to configuration file",
    )
    args = parser.parse_args()

    # Load configuration
    try:
        config = load_config(args.config)
    except Exception as e:
        logger.error(f"Failed to load config: {e}")
        sys.exit(1)

    # Create client
    client = SmartContractClient(config)

    # Setup signal handling for graceful shutdown
    def signal_handler(signum, frame):
        logger.info(f"Received signal {signum}, shutting down...")
        client.stop()

    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    # Connection loop with reconnection logic. A session that runs
    # healthy for 60+ seconds resets the attempts counter so the next
    # failure starts the exponential backoff schedule fresh.
    attempts = 0
    HEALTHY_RUN_SECONDS = 60

    while True:
        if client.connect():
            start = time.monotonic()
            ran_ok = client.run()
            if time.monotonic() - start > HEALTHY_RUN_SECONDS:
                attempts = 0
            if not ran_ok and not client.running:
                logger.info("Shutdown requested")
                client.close()
                break

        client.close()

        attempts += 1
        if config.max_reconnect_attempts > 0 and attempts >= config.max_reconnect_attempts:
            logger.error(f"Max reconnection attempts ({config.max_reconnect_attempts}) reached")
            break

        delay = next_backoff(config, attempts - 1)
        logger.info(f"Reconnecting in {delay:.1f} seconds (attempt {attempts})...")
        time.sleep(delay)

    logger.info("Client shut down")


if __name__ == "__main__":
    main()