🐹 🔍 Distributed Tracing & Observability with TuskLang & Go

Go Documentation

🔍 Distributed Tracing & Observability with TuskLang & Go

Introduction

Distributed tracing is the key to understanding complex, microservice architectures. TuskLang and Go let you implement comprehensive observability with config-driven tracing, metrics, and logging that spans your entire system.

Key Features

- OpenTelemetry integration - Trace propagation across services - Span management and correlation - Metrics collection and aggregation - Log correlation with traces - Sampling strategies - Trace visualization and analysis

Example: Tracing Config

[tracing]
backend: jaeger
endpoint: @env("JAEGER_ENDPOINT")
sampling_rate: @env("SAMPLING_RATE", 0.1)
service_name: @env("SERVICE_NAME")
metrics: @metrics("trace_duration_ms", 0)
correlation: @go("tracing.CorrelateLogs")

Go: OpenTelemetry Setup

package tracing
import (
    "context"
    "go.opentelemetry.io/otel"
    "go.opentelemetry.io/otel/exporters/jaeger"
    "go.opentelemetry.io/otel/sdk/resource"
    "go.opentelemetry.io/otel/sdk/trace"
    "go.opentelemetry.io/otel/semconv/v1.4.0"
)func InitTracer(serviceName, endpoint string) (*trace.TracerProvider, error) {
    exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(endpoint)))
    if err != nil {
        return nil, err
    }
    
    tp := trace.NewTracerProvider(
        trace.WithBatcher(exp),
        trace.WithResource(resource.NewWithAttributes(
            semconv.SchemaURL,
            semconv.ServiceNameKey.String(serviceName),
        )),
    )
    
    otel.SetTracerProvider(tp)
    return tp, nil
}

Trace Propagation

func PropagateTrace(ctx context.Context, req *http.Request) context.Context {
    // Extract trace context from headers
    ctx = otel.GetTextMapPropagator().Extract(ctx, propagation.HeaderCarrier(req.Header))
    
    // Create new span for this request
    tracer := otel.Tracer("http-server")
    ctx, span := tracer.Start(ctx, "http-request")
    defer span.End()
    
    // Add request attributes
    span.SetAttributes(
        attribute.String("http.method", req.Method),
        attribute.String("http.url", req.URL.String()),
        attribute.String("http.user_agent", req.UserAgent()),
    )
    
    return ctx
}

Span Management

func ProcessOrder(ctx context.Context, orderID string) error {
    tracer := otel.Tracer("order-service")
    
    ctx, span := tracer.Start(ctx, "process-order")
    defer span.End()
    
    span.SetAttributes(attribute.String("order.id", orderID))
    
    // Validate order
    if err := validateOrder(ctx, orderID); err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, err.Error())
        return err
    }
    
    // Process payment
    if err := processPayment(ctx, orderID); err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, err.Error())
        return err
    }
    
    // Update inventory
    if err := updateInventory(ctx, orderID); err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, err.Error())
        return err
    }
    
    span.SetStatus(codes.Ok, "Order processed successfully")
    return nil
}func validateOrder(ctx context.Context, orderID string) error {
    tracer := otel.Tracer("order-service")
    ctx, span := tracer.Start(ctx, "validate-order")
    defer span.End()
    
    // Validation logic
    time.Sleep(10 * time.Millisecond)
    return nil
}

Metrics Collection

package metrics
import (
    "go.opentelemetry.io/otel/metric"
    "go.opentelemetry.io/otel/metric/instrument"
)
type MetricsCollector struct {
    requestCounter   instrument.Int64Counter
    requestDuration  instrument.Float64Histogram
    errorCounter     instrument.Int64Counter
}
func NewMetricsCollector(meter metric.Meter) (*MetricsCollector, error) {
    requestCounter, err := meter.Int64Counter(
        "http_requests_total",
        instrument.WithDescription("Total number of HTTP requests"),
    )
    if err != nil {
        return nil, err
    }
    
    requestDuration, err := meter.Float64Histogram(
        "http_request_duration_seconds",
        instrument.WithDescription("HTTP request duration"),
    )
    if err != nil {
        return nil, err
    }
    
    errorCounter, err := meter.Int64Counter(
        "http_errors_total",
        instrument.WithDescription("Total number of HTTP errors"),
    )
    if err != nil {
        return nil, err
    }
    
    return &MetricsCollector{
        requestCounter:  requestCounter,
        requestDuration: requestDuration,
        errorCounter:    errorCounter,
    }, nil
}func (m *MetricsCollector) RecordRequest(method, path string, duration time.Duration, err error) {
    m.requestCounter.Add(context.Background(), 1, 
        attribute.String("method", method),
        attribute.String("path", path),
    )
    
    m.requestDuration.Record(context.Background(), duration.Seconds(),
        attribute.String("method", method),
        attribute.String("path", path),
    )
    
    if err != nil {
        m.errorCounter.Add(context.Background(), 1,
            attribute.String("method", method),
            attribute.String("path", path),
        )
    }
}

Log Correlation

func CorrelateLogs(ctx context.Context, msg string, fields ...log.Field) {
    span := trace.SpanFromContext(ctx)
    spanContext := span.SpanContext()
    
    // Add trace and span IDs to log
    fields = append(fields,
        log.String("trace_id", spanContext.TraceID().String()),
        log.String("span_id", spanContext.SpanID().String()),
    )
    
    logger.Info(ctx, msg, fields...)
}

Sampling Strategies

type SamplingStrategy interface {
    ShouldSample(ctx context.Context, traceID trace.TraceID, name string, kind trace.SpanKind) bool
}
type AdaptiveSampling struct {
    baseRate    float64
    errorRate   float64
    latencyP95  time.Duration
}func (a *AdaptiveSampling) ShouldSample(ctx context.Context, traceID trace.TraceID, name string, kind trace.SpanKind) bool {
    // Increase sampling for errors
    if hasError(ctx) {
        return rand.Float64() < a.baseRate*2
    }
    
    // Increase sampling for slow requests
    if getLatency(ctx) > a.latencyP95 {
        return rand.Float64() < a.baseRate*1.5
    }
    
    return rand.Float64() < a.baseRate
}

Performance Monitoring

func MonitorPerformance(ctx context.Context, operation string, fn func() error) error {
    tracer := otel.Tracer("performance")
    ctx, span := tracer.Start(ctx, operation)
    defer span.End()
    
    start := time.Now()
    err := fn()
    duration := time.Since(start)
    
    // Record performance metrics
    metrics.RecordDuration(operation, duration)
    
    if err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, err.Error())
    } else {
        span.SetStatus(codes.Ok, "Operation completed successfully")
    }
    
    return err
}

Best Practices

- Use consistent span naming conventions - Add relevant attributes to spans - Implement proper error handling and status codes - Use sampling to control trace volume - Correlate logs with trace IDs - Monitor trace performance impact

Performance Optimization

- Use async span processing for high-throughput systems - Implement intelligent sampling strategies - Batch trace exports to reduce overhead - Use efficient serialization for trace data

Security Considerations

- Sanitize sensitive data in traces - Implement trace access controls - Use secure connections for trace exporters - Comply with data privacy regulations

Troubleshooting

- Monitor trace sampling rates - Check trace exporter connectivity - Verify span correlation across services - Monitor trace storage performance

Conclusion

TuskLang + Go = observability that spans your entire system. See everything, understand everything.

← Back to Go Documentation View All Docs →