本文最后更新于3 天前,其中的信息可能已经过时,如有错误请发送邮件到2647369456@qq.com
部署链路追踪系统
LOOK
本人更推荐 jaeger
不过会有一个问题,日志保存在本地,jaeger服务宕机,重新启动后日志可能混乱,经过排查是其中一个组件服务重启失败,由于时间原因没排查出具体问题,换了一个链路收集系统。
当然也可以保存在es中,这样查询会更快,但es比较吃资源。
tempo系统部署
主要功能是接收日志收集数据,压缩数据, 存储数据,提供查询接口
tempo-values.yaml
# 全局配置
global:
image:
registry: docker.io
clusterDomain: cluster.local
# Tempo 基础配置
tempo:
structuredConfig:
compactor: {}
distributor: {}
ingester: {}
server:
http_listen_port: 3100
storage:
trace:
backend: s3 # 使用 S3 兼容存储
s3:
bucket: tempo # MinIO 存储桶名称
endpoint: loki-minio.logging.svc:9000 # MinIO 服务地址
access_key: root-user # MinIO 访问密钥,部署loki时指明
secret_key: supersecretpassword # MinIO 密钥,部署loki时指明
insecure: true
forcepathstyle: true # 使用路径风格访问
# Ingester 配置
ingester:
replicas: 3
persistence:
enabled: true
inMemory: false
size: 10Gi
storageClass: "nfs-client"
annotations: {}
# Distributor 配置
distributor:
replicas: 2
config:
log_received_spans:
enabled: true
include_all_attributes: false
# Compactor 配置
compactor:
replicas: 1
persistence:
enabled: true
inMemory: false
size: 50Gi # 从 10Gi 改为 50Gi
storageClass: "nfs-client"
annotations: {}
config:
compaction:
block_retention: 48h
compaction_window: 1h
max_block_bytes: 107374182400 # 100GB
# Query Frontend 配置
queryFrontend:
replicas: 2
query:
enabled: false # 启用 Jaeger UI
config: |
max_concurrent: 2000
max_traces_per_tenant: 10000
max_lookback_delta: 1h
extraArgs:
- "-config /conf/tempo-query.yaml"
- "--query.base-path=" # 空值以覆盖默认值
config:
max_outstanding_per_tenant: 2000
max_retries: 2
search:
concurrent_jobs: 1000
trace_by_id:
query_shards: 50
# 资源请求和限制
resources:
ingester:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
distributor:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
compactor:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
query_frontend:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
# RBAC 配置
rbac:
create: true
pspEnabled: false
# ServiceAccount 配置
serviceAccount:
create: true
annotations: {}
# 指标生成器配置
metricsGenerator:
enabled: true
config:
registry:
collection_interval: 15s
processor:
service_graphs:
dimensions: []
histogram_buckets: [0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8]
span_metrics:
dimensions: []
# 搜索配置
search_enabled: true
# 追踪保留配置
retention:
enabled: true
duration: 48h
# 存活和就绪探针
livenessProbe:
enabled: true
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
enabled: true
initialDelaySeconds: 30
periodSeconds: 10
traces:
otlp:
grpc:
enabled: true # 启用OTLP gRPC接收器
http:
enabled: true # 可选:同时启用HTTP接收器
Helm 命令
# 添加grafana仓库
helm repo add grafana https://grafana.github.io/helm-charts
# 更新仓库
helm repo update
# 部署 tempo
helm install tempo grafana/tempo-distributed \
--namespace logging \
-f tempo-values.yaml