diff --git a/gitea/deployments/gitea-db.yaml b/gitea/deployments/gitea-db.yaml index e7659af..ada1363 100644 --- a/gitea/deployments/gitea-db.yaml +++ b/gitea/deployments/gitea-db.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: mysql - image: mysql:8 + image: mysql:latest env: - name: MYSQL_ROOT_PASSWORD value: gitea123 diff --git a/gitea/deployments/gitea.yaml b/gitea/deployments/gitea.yaml index 327d56e..1b1b9d2 100644 --- a/gitea/deployments/gitea.yaml +++ b/gitea/deployments/gitea.yaml @@ -32,7 +32,7 @@ spec: - name: GITEA__database__USER value: "gitea" - name: GITEA__database__PASSWD - value: "gitea123" + value: "gitea" volumeMounts: - name: gitea-data mountPath: /data diff --git a/gitea/kustomization.yaml b/gitea/kustomization.yaml index d55b114..5405bc3 100644 --- a/gitea/kustomization.yaml +++ b/gitea/kustomization.yaml @@ -1,7 +1,5 @@ resources: - namespace.yaml - - pvc/gitea-data.yaml - - pvc/gitea-db.yaml - deployments/gitea.yaml - deployments/gitea-db.yaml - services/gitea.yaml diff --git a/gitea/services/gitea.yaml b/gitea/services/gitea.yaml index e5c111d..c817a75 100644 --- a/gitea/services/gitea.yaml +++ b/gitea/services/gitea.yaml @@ -4,11 +4,9 @@ metadata: name: gitea namespace: gitea spec: - type: NodePort selector: app: gitea ports: - name: http port: 3000 targetPort: 3000 - nodePort: 30300 diff --git a/rook/cluster/ceph-cluster-final-final.yaml b/rook/cluster/ceph-cluster-final-final.yaml new file mode 100644 index 0000000..7e925b9 --- /dev/null +++ b/rook/cluster/ceph-cluster-final-final.yaml @@ -0,0 +1,180 @@ +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + name: rook-ceph + namespace: rook-ceph +spec: + cephVersion: + image: quay.io/ceph/ceph:v18 + cleanupPolicy: + wipeDevicesFromOtherClusters: true + sanitizeDisks: + method: quick + dataSource: zero + crashCollector: {} + csi: + cephfs: {} + readAffinity: + enabled: false + dashboard: + enabled: true + dataDirHostPath: /var/lib/rook + disruptionManagement: {} + external: {} + healthCheck: + daemonHealth: + mon: {} + osd: {} + status: {} + logCollector: {} + mgr: + count: 2 + mon: + allowMultiplePerNode: false + count: 3 + monitoring: {} + network: + multiClusterService: {} + placement: + mgr: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - site-a + - site-b + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - rook-ceph-mgr + topologyKey: kubernetes.io/hostname + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mgr + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + mon: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - site-a + - site-b + - arbiter + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mon + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + security: + cephx: + csi: {} + daemon: {} + rbdMirrorPeer: {} + keyRotation: + enabled: false + kms: {} + storage: + flappingRestartIntervalHours: 0 + migration: {} + nodes: + - devices: + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405 + name: srvfkvm01 + resources: {} + - devices: + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127eef88828273 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127f879197de32 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128081a076ba0c + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128114a93e33b9 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94300301281a7b1fc151a + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128235ba79d801 + name: srvfkvm02 + resources: {} + - devices: + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128aef3bb4e0ae + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b0e3d8bc1dc + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b2b3f446dd7 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b4440c2d027 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b5e42510c2a + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b7d442e592c + name: srvfkvm03 + resources: {} + - devices: + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c003012887ebfca6752 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c0030128896e360075f + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288ac038600d4 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441 + - config: + deviceClass: ssd + fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f + name: srvfkvm04 + resources: {} + store: {} + useAllDevices: false diff --git a/rook/cluster/ceph-cluster-final.yaml b/rook/cluster/ceph-cluster-final.yaml new file mode 100644 index 0000000..bb0aaf4 --- /dev/null +++ b/rook/cluster/ceph-cluster-final.yaml @@ -0,0 +1,133 @@ +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + annotations: {} + finalizers: + - cephcluster.ceph.rook.io + name: rook-ceph + namespace: rook-ceph +spec: + cephVersion: + image: quay.io/ceph/ceph:v18 + cleanupPolicy: + sanitizeDisks: {} + wipeDevicesFromOtherClusters: false + crashCollector: {} + csi: + cephfs: {} + readAffinity: + enabled: false + dashboard: + enabled: true + dataDirHostPath: /var/lib/rook + disruptionManagement: {} + external: {} + healthCheck: + daemonHealth: + mon: {} + osd: {} + status: {} + logCollector: {} + mgr: + count: 2 + mon: + allowMultiplePerNode: false + count: 3 + monitoring: {} + network: + multiClusterService: {} + placement: + mgr: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - site-a + - site-b + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - rook-ceph-mgr + topologyKey: kubernetes.io/hostname + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mgr + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + mon: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - site-a + - site-b + - arbiter + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mon + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + security: + cephx: + csi: {} + daemon: {} + rbdMirrorPeer: {} + keyRotation: + enabled: false + kms: {} + storage: + flappingRestartIntervalHours: 0 + migration: {} + nodes: + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405 + name: srvfkvm01 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127eef88828273 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127f879197de32 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128081a076ba0c + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128114a93e33b9 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94300301281a7b1fc151a + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128235ba79d801 + name: srvfkvm02 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128aef3bb4e0ae + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b0e3d8bc1dc + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b2b3f446dd7 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b4440c2d027 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b5e42510c2a + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b7d442e592c + name: srvfkvm03 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c003012887ebfca6752 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c0030128896e360075f + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288ac038600d4 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f + name: srvfkvm04 + resources: {} + store: {} + useAllDevices: false diff --git a/rook/cluster/ceph-cluster-inicial.yaml b/rook/cluster/ceph-cluster-inicial.yaml index 9616319..5f67a21 100644 --- a/rook/cluster/ceph-cluster-inicial.yaml +++ b/rook/cluster/ceph-cluster-inicial.yaml @@ -8,25 +8,50 @@ spec: image: quay.io/ceph/ceph:v19.2.3 dataDirHostPath: /var/lib/rook + # Redes: pública por VLAN 40 (4.0), cluster por VLAN 30 (3.0) network: provider: host addressRanges: public: - "192.168.4.0/24" cluster: - - "192.168.4.0/24" - mgr: - count: 2 + - "192.168.3.0/24" mon: - count: 3 + count: 5 allowMultiplePerNode: false + mgr: + count: 2 + dashboard: enabled: true - # Evita OSDs en el futuro nodo árbitro (cuando lo añadas) placement: + # Permite programar en nodos con taint de control-plane + all: + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + + # MON: fija 1 por cada host 01–05 y evita colocalizar + mon: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: ["srvfkvm01","srvfkvm02","srvfkvm03","srvfkvm04","srvfkvm05"] + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: rook-ceph-mon + topologyKey: kubernetes.io/hostname + + # OSD: solo en zonas de datos (evita árbitro) osd: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -36,6 +61,7 @@ spec: operator: In values: ["site-a","site-b"] + # Preferencia de MGR (opcional) mgr: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: @@ -46,6 +72,9 @@ spec: operator: In values: ["srvfkvm01","srvfkvm04"] + cleanupPolicy: + wipeDevicesFromOtherClusters: true + storage: useAllNodes: false useAllDevices: false @@ -85,3 +114,4 @@ spec: - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441 - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f + diff --git a/rook/cluster/ceph-cluster.yaml b/rook/cluster/ceph-cluster.yaml new file mode 100644 index 0000000..612592e --- /dev/null +++ b/rook/cluster/ceph-cluster.yaml @@ -0,0 +1,87 @@ +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + annotations: {} + finalizers: + - cephcluster.ceph.rook.io + name: rook-ceph + namespace: rook-ceph +spec: + cephVersion: + image: quay.io/ceph/ceph:v18 + cleanupPolicy: + sanitizeDisks: {} + wipeDevicesFromOtherClusters: false + crashCollector: {} + csi: + cephfs: {} + readAffinity: + enabled: false + dashboard: + enabled: true + dataDirHostPath: /var/lib/rook + disruptionManagement: {} + external: {} + healthCheck: + daemonHealth: + mon: {} + osd: {} + status: {} + logCollector: {} + mgr: + count: 1 + mon: + count: 3 + monitoring: {} + network: + multiClusterService: {} + security: + cephx: + csi: {} + daemon: {} + rbdMirrorPeer: {} + keyRotation: + enabled: false + kms: {} + storage: + flappingRestartIntervalHours: 0 + migration: {} + nodes: + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405 + name: srvfkvm01 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127eef88828273 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127f879197de32 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128081a076ba0c + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128114a93e33b9 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94300301281a7b1fc151a + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128235ba79d801 + name: srvfkvm02 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128aef3bb4e0ae + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b0e3d8bc1dc + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b2b3f446dd7 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b4440c2d027 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b5e42510c2a + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b7d442e592c + name: srvfkvm03 + resources: {} + - devices: + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c003012887ebfca6752 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c0030128896e360075f + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288ac038600d4 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441 + - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f + name: srvfkvm04 + resources: {} + store: {} + useAllDevices: false diff --git a/rook/cluster/inicio.yaml b/rook/cluster/inicio.yaml new file mode 100644 index 0000000..e3cc6e8 --- /dev/null +++ b/rook/cluster/inicio.yaml @@ -0,0 +1,31 @@ +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + name: rook-ceph + namespace: rook-ceph +spec: + cephVersion: + # usa la misma versión que venías usando + image: quay.io/ceph/ceph:v18 + dataDirHostPath: /var/lib/rook + mon: + count: 3 + allowMultiplePerNode: false + mgr: + count: 1 + dashboard: + enabled: true + crashCollector: + disable: false + storage: + useAllNodes: false + useAllDevices: false + nodes: + - name: srvfkvm01 + devices: + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699 + - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405 diff --git a/rook/ingress/dashboard.yaml b/rook/ingress/dashboard.yaml index 7650b51..1b8b08d 100644 --- a/rook/ingress/dashboard.yaml +++ b/rook/ingress/dashboard.yaml @@ -5,8 +5,8 @@ metadata: namespace: rook-ceph annotations: cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" - nginx.ingress.kubernetes.io/whitelist-source-range: "192.168.200.0/24,192.168.0.0/24,10.244.0.0/16,192.168.4.0/24" + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" +# nginx.ingress.kubernetes.io/whitelist-source-range: "192.168.200.0/24,192.168.0.0/24,10.244.0.0/16,192.168.4.0/24" spec: ingressClassName: nginx tls: diff --git a/rook/pools/ceph-blockpool-rbd-2x2.yaml b/rook/pools/ceph-blockpool-rbd-2x2.yaml deleted file mode 100644 index 15f89cd..0000000 --- a/rook/pools/ceph-blockpool-rbd-2x2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: ceph.rook.io/v1 -kind: CephBlockPool -metadata: - name: rbd-2x2-sites - namespace: rook-ceph -spec: - failureDomain: zone - replicated: - size: 4 diff --git a/rook/pools/ceph-blockpool-rbd.yaml b/rook/pools/ceph-blockpool-rbd.yaml new file mode 100644 index 0000000..e0750d6 --- /dev/null +++ b/rook/pools/ceph-blockpool-rbd.yaml @@ -0,0 +1,18 @@ +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: rbd-2x2-sites + namespace: rook-ceph +spec: + deviceClass: ssd + failureDomain: zone + replicated: + size: 4 + replicasPerFailureDomain: 2 + subFailureDomain: host + requireSafeReplicaSize: true + parameters: + pg_autoscale_mode: "on" + min_size: "2" + mirroring: + enabled: false diff --git a/rook/readme.md b/rook/readme.md index 188af51..285cf7f 100644 --- a/rook/readme.md +++ b/rook/readme.md @@ -1,60 +1,26 @@ -# Despliegue de Rook‑Ceph en clúster **Kubernetes** (SUSE) con discos locales (Bluestore) +# Despliegue de **Rook‑Ceph** en Kubernetes (SUSE) con 2 zonas + **árbitro** -> Guía actualizada para un clúster **Kubernetes** (no K3s) en SUSE, con 4 nodos iniciales y **futura ampliación a stretch** con un quinto nodo **árbitro**. Discos locales (RAID/HBA), red de almacenamiento dedicada **VLAN 30 – 192.168.3.0/24**, y exposición del dashboard **vía Ingress NGINX** con TLS. +> Guía basada en el estado **actual** del clúster (A/B + *arbiter*), sin fase previa “sin árbitro”. Discos locales (Bluestore), distribución por **zona**, 3 MON (uno por zona) y 2 MGR (uno por site A y otro por site B). Pool RBD con **size=4** (2+2 por zona) y **min +> to**=2. --- -## 1) Requisitos previos +## 1) Topología y requisitos -* 4 nodos Kubernetes operativos: `srvfkvm01`, `srvfkvm02`, `srvfkvm03`, `srvfkvm04` (control-plane o mixtos) -* Cada nodo con **6 discos** dedicados (\~894 GB) para Ceph -* Acceso a Internet desde los nodos -* Red de almacenamiento dedicada **VLAN 30 – 192.168.3.0/24** (Ceph public/cluster) -* `kubectl` configurado y permisos de admin +* Nodos y zonas: -> **Nota de versiones**: ejemplos probados con Rook 1.17.x y Ceph v19.x (Squid) o v18.x (Reef). En los manifiestos se usa una imagen estable. + * **site-a**: `srvfkvm01`, `srvfkvm02` + * **site-b**: `srvfkvm03`, `srvfkvm04` + * **arbiter**: `srvfkvm05` *(sin OSDs)* +* Cada nodo de datos con **6 discos** dedicados a Ceph (usar rutas persistentes `/dev/disk/by-id/...`). +* Acceso a Internet desde los nodos. `kubectl` con permisos de admin. +* Versiones empleadas: **Rook v1.18.x**, **Ceph v18 (Reef)**. + +> **Objetivo de resiliencia**: tolerar la caída completa de un site (A **o** B). El árbitro aloja MON (y opcionalmente MGR), **no** OSDs. --- -## 2) Preparar discos en SUSE (solo discos de datos) - -Instala utilidades necesarias en **cada nodo**: - -```bash -sudo zypper -n install gdisk util-linux -``` - -Limpieza segura **solo** de `sdb…sdg` (ajusta si difiere): - -```bash -set -euo pipefail -DISKS=(sdb sdc sdd sde sdf sdg) - -for d in "${DISKS[@]}"; do - echo ">>> /dev/$d" - sudo sgdisk --zap-all /dev/$d || true # limpia GPT/MBR - sudo wipefs -a /dev/$d || true # borra firmas FS/LVM - sudo blkdiscard -f /dev/$d || \ # TRIM (si soporta) - sudo dd if=/dev/zero of=/dev/$d bs=1M count=10 oflag=direct,dsync -done -``` - -Obtén las rutas **persistentes** *by‑id* para cada disco (en cada nodo): - -```bash -for d in sdb sdc sdd sde sdf sdg; do - echo "=== $HOSTNAME -> $d ===" - ls -l /dev/disk/by-id/ | awk -v d="$d" '$NF ~ ("/" d "$") {print "/dev/disk/by-id/"$9}' -done -``` - -> **Usa siempre** `/dev/disk/by-id/...` en los manifiestos (campo `fullpath:`) para evitar cambios de letra. - ---- - -## 3) Etiquetado de nodos por **site** - -Vamos a distribuir por zonas lógicas desde el inicio (A/B). El árbitro llegará después. +## 2) Etiquetar nodos por **zona** ```bash # SITE A @@ -64,35 +30,72 @@ kubectl label node srvfkvm02 topology.kubernetes.io/zone=site-a --overwrite # SITE B kubectl label node srvfkvm03 topology.kubernetes.io/zone=site-b --overwrite kubectl label node srvfkvm04 topology.kubernetes.io/zone=site-b --overwrite -``` -> Cuando exista el nodo **árbitro**, se etiquetará como `topology.kubernetes.io/zone=arbiter`. +# ÁRBITRO +kubectl label node srvfkvm05 topology.kubernetes.io/zone=arbiter --overwrite +``` --- -## 4) Instalar Rook (CRDs, comunes y operador) +## 3) Preparar discos (SUSE) + +Instalar utilidades (en **cada nodo de datos**): + +```bash +sudo zypper -n install gdisk util-linux +``` + +Limpiar de forma segura (ajusta IDs según cada host): + +```bash +# Ejemplo genérico; usa *by-id* reales de cada nodo +for d in \ + /dev/disk/by-id/wwn-...a \ + /dev/disk/by-id/wwn-...b \ + /dev/disk/by-id/wwn-...c \ + /dev/disk/by-id/wwn-...d \ + /dev/disk/by-id/wwn-...e \ + /dev/disk/by-id/wwn-...f; do + echo ">>> $d" + sudo wipefs -a "$d" || true + # Cabecera 100MiB + sudo dd if=/dev/zero of="$d" bs=1M count=100 oflag=direct,dsync || true + # Cola 100MiB + real=$(readlink -f "$d"); dev=$(basename "$real") + sz=$(cat /sys/class/block/$dev/size); tail=$((100*1024*1024/512)); seek=$((sz - tail)); ((seek<0)) && seek=0 + sudo dd if=/dev/zero of="$real" bs=512 seek="$seek" count="$tail" oflag=direct,dsync || true + sudo partprobe "$real" || true; sudo udevadm settle || true +done +``` + +> **Consejo**: guarda las rutas *by‑id* exactas de cada nodo; son las que se usarán en el `CephCluster`. + +--- + +## 4) Instalar Rook (CRDs + operador) ```bash kubectl create namespace rook-ceph || true -# Clonar repo oficial (opcional para tener toolbox/ejemplos) -git clone https://github.com/rook/rook.git -cd rook/deploy/examples +# CRDs + common + operator (Rook v1.18.x) +kubectl apply -f https://raw.githubusercontent.com/rook/rook/v1.18.0/deploy/examples/crds.yaml \ + -f https://raw.githubusercontent.com/rook/rook/v1.18.0/deploy/examples/common.yaml \ + -f https://raw.githubusercontent.com/rook/rook/v1.18.0/deploy/examples/operator.yaml -kubectl apply -f crds.yaml -f common.yaml -f operator.yaml -``` - -Comprueba el operador: - -```bash kubectl -n rook-ceph get pods | grep operator ``` +> **Toolbox** (útil para diagnosticar): +> +> ```bash +> kubectl -n rook-ceph apply -f https://raw.githubusercontent.com/rook/rook/v1.18.0/deploy/examples/toolbox.yaml +> ``` + --- -## 5) CephCluster – 4 nodos, discos *by‑id*, red de storage (VLAN 30) +## 5) Manifiesto **CephCluster** (A/B + árbitro, OSDs solo en A/B) -Archivo `cluster/ceph-cluster.yaml`: +Archivo `cluster/ceph-cluster.yaml` **adaptado a tu entorno actual**: ```yaml apiVersion: ceph.rook.io/v1 @@ -102,72 +105,103 @@ metadata: namespace: rook-ceph spec: cephVersion: - image: quay.io/ceph/ceph:v19.2.3 # estable (puedes usar v18.2.x si prefieres) + image: quay.io/ceph/ceph:v18 dataDirHostPath: /var/lib/rook - # Red: usamos hostNetworking y restringimos a VLAN de storage - network: - provider: host - addressRanges: - public: - - "192.168.3.0/24" - cluster: - - "192.168.3.0/24" + dashboard: + enabled: true + + mgr: + count: 2 mon: count: 3 allowMultiplePerNode: false - dashboard: - enabled: true - - # No queremos OSDs en el futuro nodo árbitro placement: - osd: + # MGR repartidos entre site-a y site-b + mgr: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - - matchExpressions: - - key: topology.kubernetes.io/zone - operator: In - values: ["site-a", "site-b"] + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: ["site-a","site-b"] + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: ["rook-ceph-mgr"] + topologyKey: kubernetes.io/hostname + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mgr + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + + # MON uno por zona (site-a, site-b, arbiter) + mon: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: ["site-a","site-b","arbiter"] + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: rook-ceph-mon + maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + + security: + cephx: + csi: {} + daemon: {} + rbdMirrorPeer: {} storage: - useAllNodes: false useAllDevices: false nodes: - name: srvfkvm01 devices: - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405 + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5bb177a1716, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5dc196bd3a7, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d5f81b10f7ef, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d6151cca8afd, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d62f1e5e9699, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94b003012d64f204b2405, config: {deviceClass: ssd}} - name: srvfkvm02 devices: - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127eef88828273 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127f879197de32 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128081a076ba0c - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128114a93e33b9 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94300301281a7b1fc151a - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128235ba79d801 + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127eef88828273, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030127f879197de32, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128081a076ba0c, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128114a93e33b9, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d94300301281a7b1fc151a, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9430030128235ba79d801, config: {deviceClass: ssd}} - name: srvfkvm03 devices: - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128aef3bb4e0ae - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b0e3d8bc1dc - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b2b3f446dd7 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b4440c2d027 - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b5e42510c2a - - fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b7d442e592c + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128aef3bb4e0ae, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b0e3d8bc1dc, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b2b3f446dd7, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b4440c2d027, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b5e42510c2a, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x64cd98f036d9510030128b7d442e592c, config: {deviceClass: ssd}} - name: srvfkvm04 devices: - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c003012887ebfca6752 - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c0030128896e360075f - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288ac038600d4 - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441 - - fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c003012887ebfca6752, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c0030128896e360075f, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288ac038600d4, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288c62acb6efc, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288e456c6d441, config: {deviceClass: ssd}} + - { fullpath: /dev/disk/by-id/wwn-0x6ec2a72037894c00301288f976534b4f, config: {deviceClass: ssd}} ``` Aplicar y verificar: @@ -177,13 +211,20 @@ kubectl apply -f cluster/ceph-cluster.yaml kubectl -n rook-ceph get pods ``` -> Instala el **toolbox** para diagnósticos: `kubectl -n rook-ceph apply -f rook/deploy/examples/toolbox.yaml` +> **Nota**: los MON deberían quedar uno en `site-a`, otro en `site-b` y otro en `arbiter`; los MGR en `site-a` y `site-b`. Los OSDs solo en A/B. --- -## 6) Pool RBD inicial (replica **4** sobre **hosts**) + StorageClass +## 6) Activar **Orchestrator** (backend Rook) -> Con 2 sites (A/B) y **sin** árbitro, **no** uses `failureDomain: zone` con `size: 4` o las PGs quedarán *undersized*. Empezamos con **`host`** y, cuando activemos **stretch**, pasaremos a `zone`. +```bash +kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph orch set backend rook +kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph orch status +``` + +--- + +## 7) Pool **RBD** 2×2 por **zona** + StorageClass `pools/ceph-blockpool-rbd.yaml`: @@ -194,9 +235,16 @@ metadata: name: rbd-2x2-sites namespace: rook-ceph spec: - failureDomain: host + deviceClass: ssd + failureDomain: zone replicated: - size: 4 + size: 4 # 2 por site (A/B) + minSize: 2 + replicasPerFailureDomain: 2 + subFailureDomain: host + requireSafeReplicaSize: true + parameters: + pg_autoscale_mode: "on" ``` `storageclasses/rbd.yaml`: @@ -226,47 +274,25 @@ allowVolumeExpansion: true mountOptions: ["discard"] ``` -Aplicar: +Aplicar y comprobar: ```bash kubectl apply -f pools/ceph-blockpool-rbd.yaml kubectl apply -f storageclasses/rbd.yaml -kubectl get sc + +# Verificaciones rápidas +kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph osd pool get rbd-2x2-sites size +kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph osd pool get rbd-2x2-sites min_size +kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph osd crush rule dump rbd-2x2-sites -f json-pretty ``` -> Si creaste el pool inicialmente con `failureDomain: zone` y ves `active+undersized`, crea y asigna una **CRUSH rule** a host: -> -> ```bash -> kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc ' -> set -e -> ceph osd crush rule create-replicated rbd-4x-host default host || true -> ceph osd pool set rbd-2x2-sites crush_rule rbd-4x-host -> ceph osd pool get rbd-2x2-sites crush_rule -> ' -> ``` +> La regla CRUSH generada elige **zona** y luego **host** (2 réplicas por zona). Con OSDs solo en A/B, el árbitro **no** aloja datos. --- -## 7) Marcar OSDs como **SSD** (si Ceph los detecta como HDD por el HBA) +## 8) Dashboard por **Ingress** (opcional) -```bash -# Desde el toolbox -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc ' -for id in $(ceph osd ls); do ceph osd crush rm-device-class osd.$id || true; done -for id in $(ceph osd ls); do ceph osd crush set-device-class ssd osd.$id; done -ceph osd tree | egrep "zone|host|osd." -' -``` - -> Si más adelante creas un pool **solo‑SSD**, añade `spec.deviceClass: ssd` al `CephBlockPool`. - ---- - -## 8) Dashboard por **Ingress** (NGINX) en `ceph.c2et.net` - -> El dashboard del MGR escucha por defecto en **HTTP 7000**. Hacemos **TLS en el Ingress** (cert‑manager) y **HTTP** hacia el backend. - -`ingress/dashboard.yaml`: +`ingress/dashboard.yaml` (backend HTTP:7000): ```yaml apiVersion: networking.k8s.io/v1 @@ -275,15 +301,11 @@ metadata: name: ceph-dashboard namespace: rook-ceph annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod nginx.ingress.kubernetes.io/backend-protocol: "HTTP" spec: ingressClassName: nginx - tls: - - hosts: ["ceph.c2et.net"] - secretName: ceph-dashboard-tls rules: - - host: ceph.c2et.net + - host: ceph.example.local http: paths: - path: / @@ -295,27 +317,24 @@ spec: number: 7000 ``` -Credenciales: +Contraseña admin: ```bash -# Usuario por defecto -admin - -# Contraseña generada -kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{.data.password}" | base64 -d; echo - -# Cambiar contraseña (ejemplo) -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc \ -'echo -n "MiNuevaPass" | ceph dashboard ac-user-set-password admin -i -' +kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath='{.data.password}' | base64 -d; echo ``` -> Si prefieres **HTTPS 8443** también hacia el backend, habilita TLS en el dashboard de Ceph y cambia el Ingress a `backend-protocol: "HTTPS"` y puerto `8443` (y opcionalmente `proxy-ssl-verify: "off"`). +Crear usuario admin.c3s (el otro suele resetear la pass): + +```bash +kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc \ +'echo -n "Pozuelo12345" | ceph dashboard ac-user-create admin.c3s administrator -i - && ceph dashboard ac-user-list' +``` --- -## 9) Prueba rápida de PVC +## 9) Prueba de StorageClass (PVC + Pod) -`tests/pvc-test.yaml`: +`tests/pvc.yaml`: ```yaml apiVersion: v1 @@ -330,7 +349,7 @@ spec: storageClassName: ceph-rbd ``` -`tests/pod-test.yaml`: +`tests/pod.yaml`: ```yaml apiVersion: v1 @@ -351,97 +370,42 @@ spec: claimName: test-rbd ``` -Aplicar y verificar: - ```bash -kubectl apply -f tests/pvc-test.yaml -kubectl apply -f tests/pod-test.yaml +kubectl apply -f tests/pvc.yaml +kubectl apply -f tests/pod.yaml kubectl exec -it rbd-tester -- sh -c 'df -h /data && dd if=/dev/zero of=/data/test.bin bs=1M count=100 && ls -lh /data' ``` --- -## 10) **Ampliación futura**: modo **Stretch** con **árbitro** (2 sites + arbiter) - -Objetivo: supervivencia a la caída completa de un site y distribución **2+2** de réplicas entre `site-a` y `site-b`. - -1. **Añade el nodo árbitro** y etiqueta: +## 10) Guardar manifiestos exactos desde el clúster ```bash -kubectl label node topology.kubernetes.io/zone=arbiter --overwrite -``` +# CephCluster “limpio” sin campos efímeros +kubectl -n rook-ceph get cephcluster rook-ceph -o yaml --show-managed-fields=false \ + | yq 'del(.metadata.creationTimestamp,.metadata.generation,.metadata.resourceVersion,.metadata.uid,.status)' \ + > ceph-cluster-export.yaml -2. **Actualiza el CephCluster** a stretch (5 MON): - -```yaml -# parche del CephCluster (fragmento spec) -mon: - count: 5 - allowMultiplePerNode: false - stretchCluster: - failureDomainLabel: topology.kubernetes.io/zone - subFailureDomain: host - zones: - - name: arbiter - arbiter: true - - name: site-a - - name: site-b -``` - -> Mantén `placement.osd` restringido a `site-a`/`site-b` para no crear OSDs en el árbitro. - -3. **(Opcional recomendado)** Cambia el `CephBlockPool` para que el *failure domain* vuelva a **`zone`** con `size: 4` (2 por zona). Si prefieres asegurar la regla, crea una CRUSH rule específica y asígnala al pool. - -```bash -# Ejemplo: regla por zona -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc ' -set -e -# Crea regla "rbd-4x-zone" (elige leaves de tipo zone) -ceph osd crush rule create-replicated rbd-4x-zone default zone || true -# Asigna la regla al pool y ajusta size -ceph osd pool set rbd-2x2-sites crush_rule rbd-4x-zone -ceph osd pool set rbd-2x2-sites size 4 -ceph osd pool get rbd-2x2-sites crush_rule -' -``` - -> Tras el cambio a `zone`, Ceph reubica PGs para cumplir **2+2** entre `site-a` y `site-b`. Hazlo en ventana si ya hay mucho dato. - ---- - -## 11) Troubleshooting rápido - -* **PGs `active+undersized` con pool size=4**: ocurre si la regla CRUSH elige `zone` y solo hay 2 zonas (sin stretch). Solución: usa `failureDomain: host` o asigna una regla a `host` (sección 6) hasta activar stretch. -* **Ingress 503** al abrir el dashboard: el Service `rook-ceph-mgr-dashboard` usa **puerto 7000** (HTTP). Ajusta Ingress a `backend-protocol: "HTTP"` y puerto `7000`. -* **Cert TLS no emite**: revisa ClusterIssuer, DNS público hacia el Ingress y que el solver HTTP‑01 use `class: nginx`. Evita redirecciones que interfieran `/.well-known/acme-challenge/`. - ---- - -## 12) Apéndice – Comandos útiles - -Estado general: - -```bash -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph -s -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph osd tree -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph df -``` - -Ver pools y reglas: - -```bash -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph osd pool ls detail -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph osd pool get rbd-2x2-sites crush_rule -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph osd crush rule dump rbd-4x-host -``` - -Dashboard: - -```bash -kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{.data.password}" | base64 -d; echo -kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash -lc 'echo -n "NuevaPass" | ceph dashboard ac-user-set-password admin -i -' +# Pool y StorageClass +kubectl -n rook-ceph get cephblockpool rbd-2x2-sites -o yaml > ceph-blockpool-export.yaml +kubectl get sc ceph-rbd -o yaml > storageclass-rbd-export.yaml ``` --- -> **Resumen**: despliegas Rook‑Ceph con red de almacenamiento dedicada, discos por **by‑id**, pool RBD **size 4** sobre **host** para evitar PGs undersized sin árbitro, dashboard por **Ingress** (TLS en NGINX, backend HTTP:7000) y, cuando añadas el **árbitro**, pasas el clúster a **stretch** y el pool a **`failureDomain: zone`** con **2+2** por site. +## 11) Troubleshooting breve + +* **MON no se reprograma** tras borrar uno: el operador necesita que el **quórum** quede seguro. Revisa `rook-ceph-mon-endpoints`, `deployment/rook-ceph-mon-*` y `op-mon` en logs del operador. +* **OSDs detectados como HDD** vía HBA: puedes forzar `deviceClass: ssd` por disco (como en el `CephCluster`) o, ya desplegado, ajustar con `ceph osd crush set-device-class ssd osd.N`. +* **Dashboard “Orchestrator is not available”**: + + ```bash + kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph orch set backend rook + kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph orch status + ``` + +--- + +### Fin + +Con esto dispones de un despliegue Rook‑Ceph alineado con la realidad actual: 2 zonas de datos + árbitro, 3 MON (uno por zona), 2 MGR (A/B), OSDs solo en A/B, y un pool RBD con réplicas **2+2** por zona. ¡Listo para producción y ampliaciones futuras!