logging: remove loki and backfill to opensearch
This commit is contained in:
parent
456677cfbb
commit
0b78ec663d
@ -33,6 +33,10 @@ spec:
|
|||||||
- name: varlogjournal
|
- name: varlogjournal
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/log/journal
|
path: /var/log/journal
|
||||||
|
- name: fluentbit-state
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/fluent-bit
|
||||||
|
type: DirectoryOrCreate
|
||||||
extraVolumeMounts:
|
extraVolumeMounts:
|
||||||
- name: runlogjournal
|
- name: runlogjournal
|
||||||
mountPath: /run/log/journal
|
mountPath: /run/log/journal
|
||||||
@ -40,6 +44,8 @@ spec:
|
|||||||
- name: varlogjournal
|
- name: varlogjournal
|
||||||
mountPath: /var/log/journal
|
mountPath: /var/log/journal
|
||||||
readOnly: true
|
readOnly: true
|
||||||
|
- name: fluentbit-state
|
||||||
|
mountPath: /var/lib/fluent-bit
|
||||||
config:
|
config:
|
||||||
service: |
|
service: |
|
||||||
[SERVICE]
|
[SERVICE]
|
||||||
@ -51,6 +57,10 @@ spec:
|
|||||||
HTTP_Server On
|
HTTP_Server On
|
||||||
HTTP_Listen 0.0.0.0
|
HTTP_Listen 0.0.0.0
|
||||||
HTTP_Port 2020
|
HTTP_Port 2020
|
||||||
|
storage.path /var/lib/fluent-bit/storage
|
||||||
|
storage.sync normal
|
||||||
|
storage.checksum on
|
||||||
|
storage.backlog.mem_limit 50M
|
||||||
inputs: |
|
inputs: |
|
||||||
[INPUT]
|
[INPUT]
|
||||||
Name tail
|
Name tail
|
||||||
@ -63,14 +73,17 @@ spec:
|
|||||||
Refresh_Interval 10
|
Refresh_Interval 10
|
||||||
Rotate_Wait 30
|
Rotate_Wait 30
|
||||||
Inotify_Watcher false
|
Inotify_Watcher false
|
||||||
storage.type memory
|
Read_from_Head On
|
||||||
|
DB /var/lib/fluent-bit/kube.db
|
||||||
|
storage.type filesystem
|
||||||
|
|
||||||
[INPUT]
|
[INPUT]
|
||||||
Name systemd
|
Name systemd
|
||||||
Tag journald.*
|
Tag journald.*
|
||||||
Path /var/log/journal
|
Path /var/log/journal
|
||||||
Read_From_Tail On
|
Read_From_Tail Off
|
||||||
storage.type memory
|
DB /var/lib/fluent-bit/systemd.db
|
||||||
|
storage.type filesystem
|
||||||
filters: |
|
filters: |
|
||||||
[FILTER]
|
[FILTER]
|
||||||
Name kubernetes
|
Name kubernetes
|
||||||
|
|||||||
@ -6,7 +6,8 @@ resources:
|
|||||||
- opensearch-helmrelease.yaml
|
- opensearch-helmrelease.yaml
|
||||||
- opensearch-dashboards-helmrelease.yaml
|
- opensearch-dashboards-helmrelease.yaml
|
||||||
- opensearch-ism-job.yaml
|
- opensearch-ism-job.yaml
|
||||||
|
- opensearch-dashboards-setup-job.yaml
|
||||||
|
- opensearch-prune-cronjob.yaml
|
||||||
- fluent-bit-helmrelease.yaml
|
- fluent-bit-helmrelease.yaml
|
||||||
- loki-helmrelease.yaml
|
|
||||||
- oauth2-proxy.yaml
|
- oauth2-proxy.yaml
|
||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
|
|||||||
@ -1,113 +0,0 @@
|
|||||||
# services/logging/loki-helmrelease.yaml
|
|
||||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
||||||
kind: HelmRelease
|
|
||||||
metadata:
|
|
||||||
name: loki
|
|
||||||
namespace: logging
|
|
||||||
spec:
|
|
||||||
interval: 15m
|
|
||||||
chart:
|
|
||||||
spec:
|
|
||||||
chart: loki
|
|
||||||
version: "~6.6.0"
|
|
||||||
sourceRef:
|
|
||||||
kind: HelmRepository
|
|
||||||
name: grafana
|
|
||||||
namespace: flux-system
|
|
||||||
values:
|
|
||||||
fullnameOverride: loki
|
|
||||||
deploymentMode: SingleBinary
|
|
||||||
loki:
|
|
||||||
auth_enabled: false
|
|
||||||
commonConfig:
|
|
||||||
replication_factor: 1
|
|
||||||
storage:
|
|
||||||
type: filesystem
|
|
||||||
storageConfig:
|
|
||||||
filesystem:
|
|
||||||
directory: /var/loki/chunks
|
|
||||||
tsdb_shipper:
|
|
||||||
active_index_directory: /var/loki/index
|
|
||||||
cache_location: /var/loki/index_cache
|
|
||||||
schemaConfig:
|
|
||||||
configs:
|
|
||||||
- from: "2024-01-01"
|
|
||||||
store: tsdb
|
|
||||||
object_store: filesystem
|
|
||||||
schema: v13
|
|
||||||
index:
|
|
||||||
prefix: loki_index_
|
|
||||||
period: 24h
|
|
||||||
compactor:
|
|
||||||
working_directory: /var/loki/compactor
|
|
||||||
retention_enabled: true
|
|
||||||
delete_request_store: filesystem
|
|
||||||
limits_config:
|
|
||||||
retention_period: 4320h
|
|
||||||
reject_old_samples: true
|
|
||||||
reject_old_samples_max_age: 168h
|
|
||||||
read:
|
|
||||||
replicas: 0
|
|
||||||
write:
|
|
||||||
replicas: 0
|
|
||||||
backend:
|
|
||||||
replicas: 0
|
|
||||||
singleBinary:
|
|
||||||
replicas: 1
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: hardware
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- rpi5
|
|
||||||
- rpi4
|
|
||||||
persistence:
|
|
||||||
enabled: true
|
|
||||||
size: 200Gi
|
|
||||||
storageClass: asteria
|
|
||||||
gateway:
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: hardware
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- rpi5
|
|
||||||
- rpi4
|
|
||||||
chunksCache:
|
|
||||||
allocatedMemory: 512
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: hardware
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- rpi5
|
|
||||||
- rpi4
|
|
||||||
resultsCache:
|
|
||||||
allocatedMemory: 256
|
|
||||||
affinity:
|
|
||||||
nodeAffinity:
|
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
nodeSelectorTerms:
|
|
||||||
- matchExpressions:
|
|
||||||
- key: hardware
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- rpi5
|
|
||||||
- rpi4
|
|
||||||
lokiCanary:
|
|
||||||
nodeSelector:
|
|
||||||
hardware: rpi5
|
|
||||||
node-role.kubernetes.io/worker: "true"
|
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
ingress:
|
|
||||||
enabled: false
|
|
||||||
@ -55,6 +55,7 @@ spec:
|
|||||||
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
|
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
|
||||||
- --scope=openid profile email
|
- --scope=openid profile email
|
||||||
- --email-domain=*
|
- --email-domain=*
|
||||||
|
- --code-challenge-method=S256
|
||||||
- --set-xauthrequest=true
|
- --set-xauthrequest=true
|
||||||
- --pass-access-token=true
|
- --pass-access-token=true
|
||||||
- --set-authorization-header=true
|
- --set-authorization-header=true
|
||||||
|
|||||||
63
services/logging/opensearch-dashboards-setup-job.yaml
Normal file
63
services/logging/opensearch-dashboards-setup-job.yaml
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# services/logging/opensearch-dashboards-setup-job.yaml
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: opensearch-dashboards-setup-1
|
||||||
|
namespace: logging
|
||||||
|
spec:
|
||||||
|
backoffLimit: 3
|
||||||
|
ttlSecondsAfterFinished: 3600
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
hardware: rpi5
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- rpi5
|
||||||
|
containers:
|
||||||
|
- name: setup
|
||||||
|
image: alpine:3.20
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -euo pipefail
|
||||||
|
apk add --no-cache curl >/dev/null
|
||||||
|
|
||||||
|
OSD_URL="http://opensearch-dashboards.logging.svc.cluster.local:5601"
|
||||||
|
for attempt in $(seq 1 60); do
|
||||||
|
code="$(curl -s -o /dev/null -w "%{http_code}" "${OSD_URL}/api/status" || true)"
|
||||||
|
if [ "${code}" = "200" ]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
if ! curl -s -o /dev/null -w "%{http_code}" "${OSD_URL}/api/status" | grep -q "200"; then
|
||||||
|
echo "OpenSearch Dashboards did not become ready in time" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
create_view() {
|
||||||
|
view_id="$1"
|
||||||
|
title="$2"
|
||||||
|
curl -sS -X POST "${OSD_URL}/api/saved_objects/index-pattern/${view_id}?overwrite=true" \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'osd-xsrf: true' \
|
||||||
|
-d "{\"attributes\":{\"title\":\"${title}\",\"timeFieldName\":\"@timestamp\"}}" >/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
create_view kube-logs "kube-*"
|
||||||
|
create_view journald-logs "journald-*"
|
||||||
|
|
||||||
|
curl -sS -X POST "${OSD_URL}/api/opensearch-dashboards/settings" \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'osd-xsrf: true' \
|
||||||
|
-d '{"changes":{"defaultIndex":"kube-logs"}}' >/dev/null
|
||||||
@ -32,7 +32,7 @@ spec:
|
|||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
storageClass: asteria
|
storageClass: asteria
|
||||||
size: 500Gi
|
size: 1024Gi
|
||||||
config:
|
config:
|
||||||
opensearch.yml: |
|
opensearch.yml: |
|
||||||
cluster.name: opensearch
|
cluster.name: opensearch
|
||||||
|
|||||||
132
services/logging/opensearch-prune-cronjob.yaml
Normal file
132
services/logging/opensearch-prune-cronjob.yaml
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
# services/logging/opensearch-prune-cronjob.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: opensearch-prune-script
|
||||||
|
namespace: logging
|
||||||
|
data:
|
||||||
|
prune.py: |
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/")
|
||||||
|
limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4)))
|
||||||
|
patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()]
|
||||||
|
|
||||||
|
UNITS = {
|
||||||
|
"b": 1,
|
||||||
|
"kb": 1024,
|
||||||
|
"mb": 1024**2,
|
||||||
|
"gb": 1024**3,
|
||||||
|
"tb": 1024**4,
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_size(value: str) -> int:
|
||||||
|
if not value:
|
||||||
|
return 0
|
||||||
|
text = value.strip().lower()
|
||||||
|
if text in ("-", "0"):
|
||||||
|
return 0
|
||||||
|
match = re.match(r"^([0-9.]+)([a-z]+)$", text)
|
||||||
|
if not match:
|
||||||
|
return 0
|
||||||
|
number = float(match.group(1))
|
||||||
|
unit = match.group(2)
|
||||||
|
if unit not in UNITS:
|
||||||
|
return 0
|
||||||
|
return int(number * UNITS[unit])
|
||||||
|
|
||||||
|
def request_json(path: str):
|
||||||
|
url = f"{os_url}{path}"
|
||||||
|
with urllib.request.urlopen(url, timeout=30) as response:
|
||||||
|
payload = response.read().decode("utf-8")
|
||||||
|
return json.loads(payload)
|
||||||
|
|
||||||
|
def delete_index(index: str) -> None:
|
||||||
|
url = f"{os_url}/{index}"
|
||||||
|
req = urllib.request.Request(url, method="DELETE")
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as response:
|
||||||
|
_ = response.read()
|
||||||
|
print(f"deleted {index}")
|
||||||
|
|
||||||
|
indices = []
|
||||||
|
for pattern in patterns:
|
||||||
|
try:
|
||||||
|
data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date")
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 404:
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
for item in data:
|
||||||
|
index = item.get("index")
|
||||||
|
if not index or index.startswith("."):
|
||||||
|
continue
|
||||||
|
size = parse_size(item.get("store.size", ""))
|
||||||
|
created = int(item.get("creation.date", "0") or 0)
|
||||||
|
indices.append({"index": index, "size": size, "created": created})
|
||||||
|
|
||||||
|
total = sum(item["size"] for item in indices)
|
||||||
|
print(f"total_log_bytes={total}")
|
||||||
|
if total <= limit_bytes:
|
||||||
|
print("within limit")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
indices.sort(key=lambda item: item["created"])
|
||||||
|
for item in indices:
|
||||||
|
if total <= limit_bytes:
|
||||||
|
break
|
||||||
|
delete_index(item["index"])
|
||||||
|
total -= item["size"]
|
||||||
|
|
||||||
|
print(f"remaining_log_bytes={total}")
|
||||||
|
---
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: opensearch-prune
|
||||||
|
namespace: logging
|
||||||
|
spec:
|
||||||
|
schedule: "23 3 * * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 1
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
backoffLimit: 2
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
hardware: rpi5
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- rpi5
|
||||||
|
containers:
|
||||||
|
- name: prune
|
||||||
|
image: python:3.11-alpine
|
||||||
|
command: ["python", "/scripts/prune.py"]
|
||||||
|
env:
|
||||||
|
- name: OPENSEARCH_URL
|
||||||
|
value: http://opensearch-master.logging.svc.cluster.local:9200
|
||||||
|
- name: LOG_LIMIT_BYTES
|
||||||
|
value: "1099511627776"
|
||||||
|
- name: LOG_INDEX_PATTERNS
|
||||||
|
value: "kube-*,journald-*"
|
||||||
|
volumeMounts:
|
||||||
|
- name: scripts
|
||||||
|
mountPath: /scripts
|
||||||
|
volumes:
|
||||||
|
- name: scripts
|
||||||
|
configMap:
|
||||||
|
name: opensearch-prune-script
|
||||||
@ -320,13 +320,6 @@ spec:
|
|||||||
timeInterval: "15s"
|
timeInterval: "15s"
|
||||||
uid: atlas-vm
|
uid: atlas-vm
|
||||||
orgId: 2
|
orgId: 2
|
||||||
- name: Loki
|
|
||||||
type: loki
|
|
||||||
access: proxy
|
|
||||||
url: http://loki.logging.svc.cluster.local:3100
|
|
||||||
isDefault: false
|
|
||||||
uid: atlas-loki
|
|
||||||
orgId: 1
|
|
||||||
dashboardProviders:
|
dashboardProviders:
|
||||||
dashboardproviders.yaml:
|
dashboardproviders.yaml:
|
||||||
apiVersion: 1
|
apiVersion: 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user