sudo systemctl start datadog-agent | 에이전트 시작 |
sudo systemctl stop datadog-agent | 에이전트 중지 |
sudo systemctl restart datadog-agent | 에이전트 재시작 |
sudo datadog-agent status | 상태 확인 |
sudo datadog-agent health | 헬스 체크 |
sudo datadog-agent configcheck | 설정 확인 |
sudo datadog-agent flare | 지원 플레어 생성 |
# /etc/datadog-agent/datadog.yaml
api_key: YOUR_API_KEY
site: datadoghq.com
# Hostname
hostname: my-server
# Tags
tags:
- env:production
- service:web
- team:backend
# Logs
logs_enabled: true
# APM
apm_config:
enabled: true
apm_dd_url: https://trace.agent.datadoghq.com
# Process monitoring
process_config:
process_collection:
enabled: true from datadog import initialize, statsd
initialize(statsd_host="localhost", statsd_port=8125)
# Count
statsd.increment("page.views")
statsd.increment("user.signups", tags=["plan:free"])
# Gauge
statsd.gauge("queue.size", 42)
statsd.gauge("memory.used", 1024, tags=["host:web-1"])
# Histogram
statsd.histogram("request.duration", 0.5)
# Distribution
statsd.distribution("payment.amount", 99.99)
# Set (count unique values)
statsd.set("users.unique", user_id)
# Timing
from datadog import statsd
@statsd.timed("function.duration")
def my_function():
pass const StatsD = require("hot-shots");
const dogstatsd = new StatsD({
host: "localhost",
port: 8125,
prefix: "myapp.",
globalTags: { env: "production" },
});
// Count
dogstatsd.increment("page.views");
dogstatsd.increment("api.calls", 1, { endpoint: "/users" });
// Gauge
dogstatsd.gauge("queue.size", 42);
// Histogram
dogstatsd.histogram("request.duration", 150);
// Timer
const timer = dogstatsd.timer("function.duration");
// ... do work
timer.stop(); # Install: pip install ddtrace
# Run: ddtrace-run python app.py
from ddtrace import tracer, patch_all
# Auto-instrument libraries
patch_all()
# Manual span
with tracer.trace("my.operation", service="my-service") as span:
span.set_tag("user.id", user_id)
# Do work
result = do_something()
span.set_tag("result.count", len(result))
# Decorator
@tracer.wrap(service="my-service", resource="process_order")
def process_order(order_id):
pass // Install: npm install dd-trace
// Require at top of entry file
const tracer = require("dd-trace").init({
service: "my-service",
env: "production",
});
// Manual span
const span = tracer.startSpan("my.operation");
span.setTag("user.id", userId);
try {
// Do work
} finally {
span.finish();
}
// With scope
tracer.trace("my.operation", { service: "my-service" }, (span) => {
span.setTag("custom.tag", "value");
return doSomething();
}); # /etc/datadog-agent/conf.d/python.d/conf.yaml
logs:
- type: file
path: /var/log/myapp/*.log
service: my-service
source: python
tags:
- env:production
- type: docker
service: my-container
source: docker
- type: tcp
port: 10514
service: my-service
source: custom import logging
import json_log_formatter
# Setup JSON formatter
formatter = json_log_formatter.JSONFormatter()
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger = logging.getLogger()
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# Log with attributes
logger.info("User logged in", extra={
"user_id": user_id,
"dd.trace_id": tracer.current_span().trace_id,
"dd.span_id": tracer.current_span().span_id,
}) resource "datadog_monitor" "cpu_high" {
name = "High CPU Usage"
type = "metric alert"
message = "CPU usage is high on {{host.name}}. @slack-alerts"
query = "avg(last_5m):avg:system.cpu.user{env:production} by {host} > 80"
thresholds = {
critical = 80
warning = 70
}
notify_no_data = true
no_data_timeframe = 10
tags = ["env:production", "team:backend"]
}
resource "datadog_monitor" "error_rate" {
name = "High Error Rate"
type = "query alert"
message = "Error rate exceeded threshold. @pagerduty"
query = "sum(last_5m):sum:trace.http.request.errors{env:production}.as_count() / sum:trace.http.request.hits{env:production}.as_count() * 100 > 5"
thresholds = {
critical = 5
warning = 2
}
}