package main import ( "context" "encoding/json" "flag" "fmt" "io" "log" "math/rand" "os" "os/signal" "sync" "syscall" "time" pb "github.com/your-org/smart-contract/proto" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/keepalive" "google.golang.org/grpc/metadata" "gopkg.in/yaml.v3" ) // ============================================================================= // Configuration and Client Infrastructure // Do not modify this file unless you need to customize the client behavior. // Implement your smart contract logic in process.go instead. // // Durability contract (provided by this file, no work for the user): // - If the Dragonchain Prime server restarts, updates, or momentarily // drops the network, this client auto-reconnects. Transactions // observed during the outage are queued by prime and delivered once // the stream is re-established. // - If this client restarts (crash, deploy, long sleep), it rejoins // the stream and prime re-delivers every still-pending transaction // that should have invoked it, oldest first. // - Half-open TCP (a silent peer that never sent FIN) is detected // within ~13 s via gRPC keepalive pings. No dangling ghost streams. // ============================================================================= // Config holds the client configuration loaded from YAML type Config struct { ServerAddress string `yaml:"server_address"` ChainID string `yaml:"chain_id"` SmartContractID string `yaml:"smart_contract_id"` APIKey string `yaml:"api_key"` UseTLS bool `yaml:"use_tls"` TLSCertPath string `yaml:"tls_cert_path"` NumWorkers int `yaml:"num_workers"` // ReconnectDelaySecs is the BASE backoff between reconnect attempts. // The effective delay is `base * 2^attempts + jitter` capped at // MaxBackoffSeconds — so repeated failures back off, but a clean // server restart is picked up within a few seconds. ReconnectDelaySecs int `yaml:"reconnect_delay_seconds"` // MaxBackoffSeconds caps the exponential backoff. Default 120. MaxBackoffSeconds int `yaml:"max_backoff_seconds"` // MaxReconnectAttempts: 0 = infinite (default and recommended — the // whole point of this client is to stay available indefinitely). MaxReconnectAttempts int `yaml:"max_reconnect_attempts"` } // Client manages the gRPC connection and request processing type Client struct { config *Config conn *grpc.ClientConn grpcClient pb.SmartContractServiceClient workChan chan *pb.SmartContractRequest wg sync.WaitGroup logger *log.Logger } // NewClient creates a new smart contract client func NewClient(config *Config) *Client { return &Client{ config: config, workChan: make(chan *pb.SmartContractRequest, config.NumWorkers*2), logger: log.New(os.Stdout, "[SC-Client] ", log.LstdFlags|log.Lmicroseconds), } } // Connect establishes a connection to the gRPC server func (c *Client) Connect() error { var opts []grpc.DialOption if c.config.UseTLS { creds, err := credentials.NewClientTLSFromFile(c.config.TLSCertPath, "") if err != nil { return fmt.Errorf("failed to load TLS credentials: %w", err) } opts = append(opts, grpc.WithTransportCredentials(creds)) } else { opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) } // Keepalive is the load-bearing piece for detecting a half-open // connection. Without it, a silent peer (prime restarted without // sending FIN; laptop resumed from sleep; corporate NAT dropped the // flow) leaves us in a "connected" state until the OS-level TCP // keepalive eventually fires — which on Linux defaults to ~2 hours. // 10 s ping + 3 s timeout catches all of that within ~13 s. opts = append(opts, grpc.WithKeepaliveParams(keepalive.ClientParameters{ Time: 10 * time.Second, Timeout: 3 * time.Second, PermitWithoutStream: true, })) conn, err := grpc.NewClient(c.config.ServerAddress, opts...) if err != nil { return fmt.Errorf("failed to connect to server: %w", err) } c.conn = conn c.grpcClient = pb.NewSmartContractServiceClient(conn) c.logger.Printf("Connected to server at %s", c.config.ServerAddress) return nil } // Close closes the gRPC connection func (c *Client) Close() error { if c.conn != nil { return c.conn.Close() } return nil } // Run starts the client and processes incoming requests. It returns // when the stream terminates for any reason (server close, network // error, ctx cancellation). The outer reconnect loop in main() calls // Run again after a backoff. func (c *Client) Run(ctx context.Context) error { // Wrap ctx with our own cancel so the sender goroutine can tear down // the stream on Send errors — otherwise stream.Recv() in the main // loop could block forever waiting for a peer that is never coming // back. Any cancel from here propagates to both directions of the // bidi stream. streamCtx, streamCancel := context.WithCancel(ctx) defer streamCancel() // Auth + routing metadata. x-chain-id is required by the server; a // missing header yields "missing chain ID" from prime and no // transactions will arrive. md := metadata.Pairs( "x-api-key", c.config.APIKey, "x-smart-contract-id", c.config.SmartContractID, "x-chain-id", c.config.ChainID, ) streamCtx = metadata.NewOutgoingContext(streamCtx, md) // Establish the bi-directional stream stream, err := c.grpcClient.Run(streamCtx) if err != nil { return fmt.Errorf("failed to establish stream: %w", err) } c.logger.Printf("Stream established, starting %d workers", c.config.NumWorkers) // Channel to collect responses from workers responseChan := make(chan *pb.SmartContractResponse, c.config.NumWorkers*2) errChan := make(chan error, 2) // Start worker goroutines for i := 0; i < c.config.NumWorkers; i++ { c.wg.Add(1) go c.worker(streamCtx, responseChan) } // Sender: forwards worker responses back to the server. Any Send // error immediately cancels streamCtx so the Recv loop below exits // instead of blocking forever. go func() { for resp := range responseChan { if err := stream.Send(resp); err != nil { c.logger.Printf("Error sending response: %v", err) select { case errChan <- err: default: } streamCancel() return } } }() // Main loop: receive requests and dispatch to workers. stream.Recv // returns when the peer closes the stream, when streamCtx is cancelled // (e.g. because the sender goroutine hit an error), or on a real // transport error. var recvErr error for { req, err := stream.Recv() if err == io.EOF { c.logger.Println("Server closed the stream") break } if err != nil { recvErr = err break } c.logger.Printf("Received request: transaction_id=%s", req.TransactionId) select { case c.workChan <- req: case <-streamCtx.Done(): recvErr = streamCtx.Err() goto cleanup } } cleanup: // Tear down in-flight workers. Cancelling streamCtx was already done // via defer; close(workChan) lets the worker goroutines exit their // range loop cleanly. close(c.workChan) c.wg.Wait() close(responseChan) c.workChan = make(chan *pb.SmartContractRequest, c.config.NumWorkers*2) if recvErr != nil { return fmt.Errorf("error receiving request: %w", recvErr) } // Surface any earlier Send error the sender goroutine parked on // errChan so the reconnect loop sees it. select { case err := <-errChan: return fmt.Errorf("stream send error: %w", err) default: return nil } } // worker processes requests from the work channel func (c *Client) worker(ctx context.Context, responseChan chan<- *pb.SmartContractResponse) { defer c.wg.Done() for { select { case req, ok := <-c.workChan: if !ok { return } c.processRequest(ctx, req, responseChan) case <-ctx.Done(): return } } } // processRequest handles a single request func (c *Client) processRequest(ctx context.Context, req *pb.SmartContractRequest, responseChan chan<- *pb.SmartContractResponse) { // Capture logs (in production, you might want a more sophisticated logging approach) var logs string // Call the user-defined Process function result := Process(ctx, req.TransactionJson, req.EnvVars, req.Secrets) // Build the response resp := &pb.SmartContractResponse{ TransactionId: req.TransactionId, OutputToChain: result.OutputToChain, Logs: logs, } if result.Error != nil { resp.Error = result.Error.Error() c.logger.Printf("Error processing transaction %s: %v", req.TransactionId, result.Error) } else { // Marshal the result data to JSON resultJSON, err := json.Marshal(result.Data) if err != nil { resp.Error = fmt.Sprintf("failed to marshal result: %v", err) c.logger.Printf("Error marshaling result for transaction %s: %v", req.TransactionId, err) } else { resp.ResultJson = string(resultJSON) c.logger.Printf("Successfully processed transaction %s", req.TransactionId) } } select { case responseChan <- resp: case <-ctx.Done(): } } // LoadConfig loads configuration from a YAML file func LoadConfig(path string) (*Config, error) { data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("failed to read config file: %w", err) } config := &Config{ NumWorkers: 10, ReconnectDelaySecs: 3, MaxBackoffSeconds: 120, } if err := yaml.Unmarshal(data, config); err != nil { return nil, fmt.Errorf("failed to parse config file: %w", err) } // Validate required fields if config.ServerAddress == "" { return nil, fmt.Errorf("server_address is required") } if config.ChainID == "" { return nil, fmt.Errorf("chain_id is required") } if config.SmartContractID == "" { return nil, fmt.Errorf("smart_contract_id is required") } if config.APIKey == "" { return nil, fmt.Errorf("api_key is required") } return config, nil } // nextBackoff returns the duration to sleep before the next reconnect. // Computed as base * 2^attempts with a random jitter in [0, base) and // capped at MaxBackoffSeconds. Jitter matters when many clients // reconnect simultaneously after a server restart — it desynchronises // them so they don't all slam accept() at the same instant. func nextBackoff(cfg *Config, attempts int) time.Duration { base := time.Duration(cfg.ReconnectDelaySecs) * time.Second if base <= 0 { base = 3 * time.Second } maxBackoff := time.Duration(cfg.MaxBackoffSeconds) * time.Second if maxBackoff <= 0 { maxBackoff = 120 * time.Second } // Cap the exponent so we don't overflow. 2^10 = 1024 ≈ always // clipped by maxBackoff anyway, but keep the math bounded. shift := attempts if shift > 10 { shift = 10 } delay := base << shift if delay > maxBackoff { delay = maxBackoff } // Jitter range == base, independent of attempts. Adding it ensures // we don't schedule a thundering herd on the next attempt even if // every client started with the same `attempts` count. jitter := time.Duration(rand.Int63n(int64(base))) return delay + jitter } func main() { configPath := flag.String("config", "config.yaml", "Path to configuration file") flag.Parse() // Load configuration config, err := LoadConfig(*configPath) if err != nil { log.Fatalf("Failed to load config: %v", err) } // Create client client := NewClient(config) // Setup signal handling for graceful shutdown ctx, cancel := context.WithCancel(context.Background()) defer cancel() sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) go func() { sig := <-sigChan log.Printf("Received signal %v, shutting down...", sig) cancel() }() // Connection loop with reconnection logic. A "successful" session // is defined as one where Run() was active for at least 60 s — // anything longer is almost certainly real work, so reset attempts // so the next failure starts the backoff schedule fresh. attempts := 0 const healthyRunThreshold = 60 * time.Second for { if err := client.Connect(); err != nil { log.Printf("Connection failed: %v", err) } else { start := time.Now() if err := client.Run(ctx); err != nil { if ctx.Err() != nil { log.Println("Shutdown requested") _ = client.Close() break } log.Printf("Stream error: %v", err) } if time.Since(start) > healthyRunThreshold { attempts = 0 } } _ = client.Close() // Check if we should stop reconnecting if ctx.Err() != nil { break } attempts++ if config.MaxReconnectAttempts > 0 && attempts >= config.MaxReconnectAttempts { log.Printf("Max reconnection attempts (%d) reached, exiting", config.MaxReconnectAttempts) break } delay := nextBackoff(config, attempts-1) log.Printf("Reconnecting in %v (attempt %d)...", delay, attempts) select { case <-time.After(delay): case <-ctx.Done(): return } } log.Println("Client shut down") }