Observing code in production with OpenTelemetry (OTEL)
while True:
try:
x = int(input("Enter a number: "))
break
except ValueError:
print("Oops!")
Photo by Altered Vision on Unsplash
Photo by Oliver Paaske on Unsplash
E0419 06:59:50.681167 1 reflector.go:138] pkg/mod/k8s.io/client-go@v0.22.4/tools/cache/reflector.go:167: Failed to watch *v1.Endpoints: the server has asked for the client to provide credentials (get endpoints)
time="2024-04-17T20:10:41Z" level=info msg="successfully synced configuration to kong." subsystem=proxy-cache-resolver
get '/posts' do
Datadog::Tracing.trace('web.request', service: 'my-blog', resource: 'GET /posts') do |span|
# Trace the activerecord call
Datadog::Tracing.trace('posts.fetch') do
@posts = Posts.order(created_at: :desc).limit(10)
end
# Add some APM tags
span.set_tag('http.method', request.request_method)
span.set_tag('posts.count', @posts.length)
# Trace the template rendering
Datadog::Tracing.trace('template.render') do
erb :index
end
end
end
OTel captures traces, metrics, logs, and other application telemetry data and lets you send it to the backend of your choice.
// main.go
func main() {
http.HandleFunc("/rolldice", rolldice)
log.Fatal(http.ListenAndServe(":8000", nil))
}
// rolldice.go
func rolldice(w http.ResponseWriter, r *http.Request) {
roll := 1 + rand.Intn(6)
// Sleep for a few seconds, so we can see longer traces
// And breakout the slow api call versus other work
// that might be done in this function, i.e. a DB query
time.Sleep(time.Millisecond * time.Duration(roll*100))
log.Println("simulating a call to slow api")
err := simulateSlowAPI(500 * roll)
if err != nil {
log.Printf("Failed to make call to simulate slow api")
}
resp := strconv.Itoa(roll) + "\n"
if _, err := io.WriteString(w, resp); err != nil {
log.Printf("Write failed: %v\n", err)
}
}
// rolldice.go
// A function for simulating slowness with an external http call
func simulateSlowAPI(sleepInMilliseconds int) error {
// Construct the URL with the sleep parameter
baseURL := "https://fakeresponder.com/"
sleepParam := "?sleep=" + strconv.Itoa(sleepInMilliseconds)
// Create the full URL
fullURL := baseURL + sleepParam
// Make the GET request
resp, err := http.Get(fullURL)
if err != nil {
log.Println("Error making the GET request:", err)
return err
}
// > go run .
2024/04/24 22:03:11 simulating a call to slow api
2024/04/24 22:03:16 simulating a call to slow api
2024/04/24 22:03:20 simulating a call to slow api
2024/04/24 22:03:23 simulating a call to slow api
// > watch -n 2 'curl -s localhost:8000/rolldice
Every 2.0s: curl -s localhost:8000/rolldice
4
## TODO: find out what's slow
_Assuming we didn't already know_ :)
- [ ] Add more logs
- [ ] Add metrics (response time, number of calls, etc)
*OR*
- [x] Add OTel
// main.go
import "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
// Set up OpenTelemetry.
otelShutdown, err := setupOTelSDK(ctx)
if err != nil {
return
}
// Handle shutdown properly so nothing leaks.
defer func() {
err = errors.Join(err, otelShutdown(context.Background()))
}()
...
handleFunc := func(pattern string, handlerFunc func(http.ResponseWriter, *http.Request)) {
// Configure the "http.route" for the HTTP instrumentation.
handler := otelhttp.WithRouteTag(pattern, http.HandlerFunc(handlerFunc))
mux.Handle(pattern, handler)
handler := otelhttp.NewHandler(mux, "/")
// otel.go
func setupOTelSDK(ctx context.Context) (shutdown func(context.Context) error, err error) {
// Set up trace provider.
tracerProvider, err := newTraceProvider()
if err != nil {
handleErr(err)
return
}
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
otel.SetTracerProvider(tracerProvider)
case "stdout":
exporter, err = stdouttrace.New(
stdouttrace.WithPrettyPrint()
)
// rolldice.go
var (
tracer = otel.Tracer("rolldice")
meter = otel.Meter("rolldice")
rollCnt metric.Int64Counter
)
func rolldice(w http.ResponseWriter, r *http.Request) {
ctx, span := tracer.Start(r.Context(), "rolling dice")
defer span.End()
roll := 1 + rand.Intn(6)
// Sleep for a few seconds, so we can see longer traces
time.Sleep(time.Millisecond * time.Duration(roll*100))
span.AddEvent("simulating a call to slow api")
err := simulateSlowAPI(500*roll, ctx)
if err != nil {
log.Printf("Failed to make call to simulate slow api")
}
// rolldice.go
func simulateSlowAPI(sleepInMilliseconds int, ctx context.Context) error {
ctx, childSpan := tracer.Start(ctx, "simulatedSlowApiSpan")
defer childSpan.End()
// Construct the URL with the sleep parameter
baseURL := "https://fakeresponder.com/"
sleepParam := "?sleep=" + strconv.Itoa(sleepInMilliseconds)
(base) ➜ otel-dice-example git:(otel-with-stdout) ✗ go run .
{"Resource":[
{"Key":"service.name","Value":
{"Type":"STRING","Value":"dice"}
},
{"Key":"service.version","Value":
{"Type":"STRING","Value":"0.1.0"}
},
{"Key":"telemetry.sdk.language","Value":
{"Type":"STRING","Value":"go"}
},
{"Key":"telemetry.sdk.name","Value":
{"Type":"STRING","Value":"opentelemetry"}
},
{"Key":"telemetry.sdk.version","Value":
{"Type":"STRING","Value":"1.25.0"}
}
],"ScopeMetrics":[]}
{"Resource":[{"Key":"service.name","Value":{"Type":"STRING","Value":"dice"}},{"Key":"service.version","Value":{"Type":"STRING","Value":"0.1.0"}},{"Key":"telemetry.sdk.language","Value":{"Type":"STRING","Value":"go"}},{"Key":"telemetry.sdk.name","Value":{"Type":"STRING","Value":"opentelemetry"}},{"Key":"telemetry.sdk.version","Value":{"Type":"STRING","Value":"1.25.0"}}],"ScopeMetrics":[]}
{
"Name": "/",
"SpanContext": {
"TraceID": "5a844fd338c4c00ec001fb28ea352c64",
"SpanID": "4908426582a52298",
},
"Parent": {
"TraceID": "00000000000000000000000000000000",
"SpanID": "0000000000000000",
},
"SpanKind": 2,
"StartTime": "2024-04-25T00:21:00.179788-06:00",
"EndTime": "2024-04-25T00:21:01.471713625-06:00",
"Attributes": [
{
"Key": "http.route",
"Value": {
"Type": "STRING",
"Value": "/rolldice"
}
},
{
"Name": "rolling dice",
"SpanContext": {
"TraceID": "5a844fd338c4c00ec001fb28ea352c64",
"SpanID": "7e394450317027c5",
},
"Parent": {
"TraceID": "5a844fd338c4c00ec001fb28ea352c64",
"SpanID": "4908426582a52298",
},
"Attributes": [
{
"Key": "roll.value",
"Value": {
"Type": "INT64",
"Value": 2
}
}
],
> docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
> OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 go run .
// otel.go
func createOtelExporter(exporterType string) (trace.SpanExporter, error) {
case "otlp":
var opts []otlptracehttp.Option
exporter, err = otlptrace.New(
context.Background(),
otlptracehttp.NewClient(opts...),
)
case "stdout":
exporter, err = stdouttrace.New(stdouttrace.WithPrettyPrint())
Win a print copy of Observability Engineering
Observability
@virtualandy
Photo by Frederick Marschall on Unsplash