diff --git a/Micro–DC/FR-blueprint-model-v00.toon b/Micro–DC/FR-blueprint-model-v00.toon new file mode 100644 index 0000000..45b438b --- /dev/null +++ b/Micro–DC/FR-blueprint-model-v00.toon @@ -0,0 +1,564 @@ +meta: + format: toon + version: "1.0" + kind: "deployment_blueprint" + name: "Sovereign Modular Micro-DC v1 — FR/GDPR, Eco-Efficient" + generated_by: "AI Council OS — 14-seat round table" + lastUpdated: "2026-01-01" + +context: + objective: > + Deploy a repeatable, sovereign, eco-efficient micro-data center “module” + in France (FR) that can be cloned to multiple French regions. All infra + must be reproducible from Git, fully automated (zero manual provisioning), + and aligned with GDPR/data-sovereignty and EU/French sustainability and + facility expectations. + primary_regime: + jurisdiction: "EU/EEA, Member State: France" + privacy: "GDPR + French Data Protection Act (Loi Informatique et Libertés)" + dpia_required_for: + - "Healthcare data" + - "Large-scale processing of special categories of personal data" + - "AI/ML profiling of individuals at scale" + facility_standards: + - "EN 50600-oriented design" + - "French electrical codes and safety regulations" + - "ICPE classification assessment for data centers (as applicable)" + sustainability_frameworks: + - "EU Code of Conduct for Data Centres (energy efficiency)" + - "Energy Efficiency Directive (EED) reporting where thresholds reached" + - "National French energy and climate objectives (low-carbon grid mix)" + target_use_cases: + - "AI/ML training and inference with GPUs for French public and private sector" + - "SaaS / line-of-business apps for French customers" + - "Edge compute for collectivités territoriales, health, and education" + design_principles: + - "Sovereign-by-design: hosted in France, managed by FR/EU entities" + - "Modular: small, repeatable 'bricks' instead of bespoke facilities" + - "Infra-as-code and policy-as-code; no snowflake clusters" + - "Observability, SLOs, error budgets from day one" + - "Sustainability KPIs (PUE/WUE/renewables/reuse) are first-class" + +assumptions: + module_scale: + it_load_kw: 80 # first FR module; can scale to 150 kW in v2 + racks_total: 8 + racks_gpu: 2 + racks_compute: 4 + racks_storage: 2 + location_examples: + - "Île-de-France (suburban site with district heating option)" + - "Occitanie / Nouvelle-Aquitaine (good free-cooling potential)" + stack_choice: + bare_metal: "MAAS (or equivalent) for server discovery/commissioning" + virtualization: "Proxmox VE on most nodes; bare-metal K8s for GPU nodes optional" + cloud_layer: "Kubernetes as primary control plane; OpenStack optional add-on" + storage: "Ceph (NVMe + HDD tiers) + object storage; local NVMe cache on GPU nodes" + automation_stack: + iac: + - "Terraform for network/DCIM/inventory where APIs exist" + - "Ansible for OS/provisioning/bootstrap" + gitops: + - "Argo CD or Flux for K8s/OpenStack configuration" + policy_as_code: + - "OPA/Kyverno, CI policy checks, security/compliance gates" + sovereign_controls: + residency: + - "All primary storage and processing located in France (FR-based micro-DCs)" + - "Backups replicated only within FR or EU/EEA, never outside without explicit legal basis" + data_classification_levels: + - "PUBLIC" + - "INTERNAL" + - "PERSONAL" + - "SENSITIVE_PERSONAL" + - "CRITICAL_SOVEREIGN_FR" + cross_border_rules: + - "CRITICAL_SOVEREIGN_FR: must not leave France" + - "SENSITIVE_PERSONAL: must not leave EU/EEA" + - "PERSONAL: only with approved GDPR transfer mechanism and DPO sign-off" + operators: + - "Primary operations teams domiciled in EU; French entities preferred" + - "No non-EU NOC with direct admin access; mediation via EU-based bastions" + +naming_conventions: + overview: > + Canonical naming scheme for sites and devices, used consistently in all + blueprints, IaC, monitoring, documentation and inventory systems. Pattern + is designed to be global (multi-continent), sovereign-aware (country), + location-specific (city) and module/rack/device specific. + + site_code: + pattern: "--" + description: > + Human- and machine-readable identifier for a physical site/module. + Always use fixed-width 2-digit numeric suffix for uniqueness. + examples: + - "EU-PAR-FR01 # Paris, France - primary site" + - "EU-PAR-FR02 # Paris, France - secondary site" + - "EU-MAR-FR03 # Marseille, France - third site" + - "EU-FRA-DE01 # Frankfurt, Germany - first DE site" + - "US-NY-US01 # New York, USA - first US site" + components: + continent: + code_values: + - "EU # Europe" + - "US # United States" + notes: "Extend with other continents (AP, AF, etc.) as needed." + country: + code_values: + - "FR # France" + - "DE # Germany" + - "US # United States" + notes: "Use ISO-like 2-letter codes for countries." + city: + code_values: + - "PAR # Paris" + - "MAR # Marseille" + - "BOR # Bordeaux" + - "NAN # Nantes" + - "FRA # Frankfurt" + - "NY # New York" + notes: "City codes are stable mnemonics; define centrally and reuse." + index: + pattern: "NN # 01–99" + notes: "Unique per city+country; 01 usually primary, 02 secondary, etc." + + rack_code: + pattern: "-RK" + description: > + Identifies a specific rack within a site. Can be extended with room/zone + information when necessary while preserving RK as the rack index. + examples: + - "EU-PAR-FR01-RK01" + - "EU-PAR-FR01-RK02" + - "EU-FRA-DE01-RK01" + - "US-NY-US01-RK01" + extensions: + room_or_zone: + description: > + If racks span multiple rooms/zones, use a suffix or infix such as + RK01A, RK02B or Z1-RK01 as standardised in the physical model. + examples: + - "EU-PAR-FR01-Z1-RK01" + - "EU-PAR-FR01-RK01A" + + device_code: + pattern: "-RK-
" + description: > + Identifies a specific device in a rack. DEVICE is a short type code; +
is a 2-digit index, except for devices that traditionally use + letter suffixes (e.g., PDUs A/B). + examples: + firewalls: + - "EU-PAR-FR01-RK01-FW01" + - "EU-PAR-FR01-RK01-FW02" + management_nodes: + - "EU-PAR-FR01-RK01-mgmt01 # Local management (e.g. MAAS rack controller)" + - "EU-PAR-FR01-RK01-mgmt02" + switches: + - "EU-PAR-FR01-RK01-tor01 # ToR / L3 switch" + - "EU-PAR-FR01-RK02-tor02" + - "EU-PAR-FR01-RK01-lf01 # Leaf" + - "EU-PAR-FR01-RK02-sp02 # Spine" + servers: + - "EU-PAR-FR01-RK01-srv01" + - "EU-PAR-FR01-RK01-srv02" + - "EU-FRA-DE01-RK01-srv01" + - "US-NY-US01-RK01-srv01" + storage: + - "EU-PAR-FR01-RK01-san01 # SAN array" + - "EU-PAR-FR01-RK01-nas01 # NAS filer" + - "EU-PAR-FR01-RK01-jbd01 # JBOD / disk shelf" + monitoring: + - "EU-PAR-FR01-RK01-mon01" + - "EU-PAR-FR01-RK01-mon02" + power: + - "EU-PAR-FR01-RK01-pduA" + - "EU-PAR-FR01-RK01-pduB" + + device_type_codes: + tor: "Top of Rack switch (often L3 capable)" + ss: "Super spine" + sp: "Spine" + blf: "Border leaf" + lf: "Leaf" + fw: "Firewall" + lb: "Load balancer" + srv: "Server (compute/GPU/infra)" + san: "SAN storage array" + nas: "NAS filer" + jbd: "JBOD / disk shelf" + oob: "Out-of-band management device" + mgmt: "Generic management node (e.g., MAAS, jump host)" + mon: "Monitoring / logging node" + pduA: "Rack PDU side A" + pduB: "Rack PDU side B" + + implementation_notes: + - "All naming must be enforced in IaC modules (Terraform/Ansible variables, K8s labels, etc.)." + - "Monitoring, CMDB and inventory tools must use these names as primary identifiers." + - "Avoid ad-hoc names; new device types must extend the device_type_codes map and be reviewed." + - "Where external systems impose constraints (e.g. 15-character limits), define deterministic truncation rules." + +architecture: + layers: + - name: "Facility & Physical — FR Module (Physical Infrastructure & Facility Engineering Lead)" + description: > + Design of the physical micro-DC module in France: room/container, + racks, power, cooling, structured cabling, environmental monitoring, + aligned with French building/electrical codes and EN 50600-style + principles, implementing sustainability objectives from the Sovereign + Compliance & Sustainability Lead. + design: + form_factor: + options: + - "Prefabricated container (2-4 racks) for remote/edge French sites" + - "Dedicated technical room in existing building for 6-10 racks" + physical_security: + access_control: "Badged access with logs; dual-person for critical work" + surveillance: "CCTV with retention per French law; privacy-by-design zones" + fire_protection: "Gas or water-mist systems suitable for IT rooms" + power: + utility_feeds: "At least 1 primary + 1 secondary where DSO allows" + ups_topology: "Modular online UPS, N+1" + generator: + presence: true + autonomy_hours: 8 + redundancy_level: "N+1 for IT load, 2N for critical infra when justified" + per_rack_pdu: + type: "Intelligent, metered, switched" + metering_resolution: "Per-outlet where possible" + grid_integration: + demand_response_ready: true + notes: "Enable participation in French flexibility services where economically viable" + cooling: + primary: + type: "In-row or rear-door cooling units" + chilled_water: "Preferred for higher-density FR sites" + free_cooling: + enabled: true + mode: "Dry coolers / adiabatic where climate permits" + gpu_rack_density_kw: 20 + cpu_rack_density_kw: 8 + set_points: + cold_aisle_celsius: [26, 28] + humidity_relative_percent: [20, 80] + sustainability_integration: + renewable_sources: + - "Grid-supplied low-carbon French electricity mix" + - "Optional rooftop PV for partial coverage" + waste_heat_reuse: + enabled: true + integration_targets: + - "Local district heating network" + - "Nearby public buildings (pools, gyms, campuses)" + monitoring: + sensors: + - "Rack inlet temperature" + - "Rack exhaust temperature" + - "Room temperature and humidity" + - "PDU-level power and voltage" + - "Cooling system status and energy use" + telemetry_export: + protocol: "SNMP/Modbus translated to Prometheus metrics" + endpoint: "Local telemetry gateway in INFRA_MGMT VRF" + documentation_as_code: + artefacts: + - "fr/site_manifest_fr-.yaml" + - "fr/rack_layout_fr-.yaml" + - "fr/power_chain_fr-.yaml" + - "fr/cooling_spec_fr-.yaml" + + - name: "Network & Connectivity — FR (Network Architect)" + design: + topology: + underlay: "Leaf-spine, 2x spine, dual ToR per rack where cost-effective" + uplinks_per_rack: 2 + routing: "L3 to the top, BGP between ToR and spines" + addressing_and_dns: + ipam_source: "Central IPAM (Git-backed) with FR site overlays" + domains: + - "mgmt..fr" + - "svc..fr" + segmentation: + vrfs: + - name: "INFRA_MGMT" + purpose: "OOB and infra management for FR module" + - name: "TENANT" + purpose: "Production/workload traffic" + - name: "STORAGE" + purpose: "Ceph and backup traffic" + - name: "OUT_OF_BAND" + purpose: "Serial consoles, IPMI/iDRAC" + vlans: + - { id: 10, name: "mgmt_nodes" } + - { id: 20, name: "storage_cluster" } + - { id: 30, name: "k8s_nodes" } + - { id: 40, name: "gpu_nodes" } + - { id: 100, name: "dmz" } + wan: + connectivity: + - "Dual ISPs whenever feasible (e.g., Orange + alternative operator)" + - "Optional private L2/L3 link to regional hub in France" + sovereignty: + - "All VPN termination in FR; keys managed by FR/EU entities" + - "No direct internet access to infra mgmt; jump via FR-based bastions" + infra_as_code: + tools: + - "Terraform for switch/router config where supported" + - "Ansible for config templating, deployment, and idempotent updates" + testing: + - "Static analysis of configs before apply" + - "Automated reachability and segmentation tests (ping, traceroute, ACL checks)" + + - name: "Compute, Storage & Virtualization — FR Module (Virtualization Architect, Capacity & Performance Engineer)" + design: + node_types: + - name: "fr-compute-standard" + cpu: "2 x 32-core (FR-approved vendor SKUs)" + ram_gb: 512 + storage_local: + system: "2 x SSD in RAID1" + data_nvme: "Optional 2 x NVMe" + - name: "fr-compute-gpu" + cpu: "2 x 32-core, NUMA-aligned" + gpus: 4 + ram_gb: 768 + storage_local: + system: "2 x SSD RAID1" + scratch_nvme: "2-4 x NVMe for local scratch" + - name: "fr-storage-ceph" + cpu: "1 x 24-core" + ram_gb: 256 + storage: + osd_nvme: 2 + osd_hdd: 10 + hypervisor: + platform: "Proxmox VE" + features: + - "Cluster with quorum across at least 3 nodes" + - "Ceph integration for shared storage" + - "GPU passthrough and SR-IOV per FR GPU node profiles" + storage: + ceph: + pools: + - name: "fr-k8s-block" + type: "replicated" + - name: "fr-gpu-block" + type: "replicated" + - name: "fr-object-archive" + type: "erasure-coded" + locality: + constraint: "Within FR modules; optional replication between FR sites only" + performance_principles: + - "NUMA and PCIe alignment validated for all GPU nodes" + - "Baseline throughput and latency tests stored in Git (fr/perf_baselines/)" + - "Capacity plans per region (Île-de-France, etc.) tied to real telemetry" + + - name: "Platform & Workloads — FR (Principal SRE, Automation & IaC Lead, OpenStack Architect)" + design: + provisioning_flow: + - "MAAS discovers and commissions FR bare-metal nodes using fr/hardware_profiles/" + - "Ansible installs Proxmox or K8s node OS from fr/ansible/roles/" + - "GitOps (Argo/Flux) applies cluster and app layer from fr/platform-clusters/" + clusters: + k8s: + role: "Primary app platform for FR workloads" + ha_control_plane: 3 + worker_pools: + - name: "fr-general" + taints: [] + - name: "fr-gpu" + taints: + - "gpu=true:NoSchedule" + openstack_optional: + enabled: false + note: "Can be enabled for VM-heavy FR workloads later as microdc-fr-v2" + multi_tenancy: + k8s: + namespaces: + - "fr-public" + - "fr-internal" + - "fr-personal" + - "fr-sensitive" + - "fr-critical-sovereign" + policies: + - "ResourceQuotas per namespace aligned with capacity models" + - "NetworkPolicies enforcing zero-trust and data-class separation" + + - name: "Compliance, Sovereignty & Sustainability — FR (Sovereign Compliance & Sustainability Lead, Physical Infrastructure Lead, Security Architect)" + design: + data_residency: + rules: + - "CRITICAL_SOVEREIGN_FR namespaces must use storage classes bound to FR-local Ceph pools only" + - "Backups of CRITICAL_SOVEREIGN_FR stay within FR; SENSITIVE_PERSONAL only in EU" + - "All backup and DR targets defined explicitly in fr/policies/data_residency.yaml" + admin_access: + identity_provider: "FR/EU-based IdP (e.g., French-based IdP or EU cloud with FR region)" + controls: + - "MFA required for all admin access" + - "Just-in-time elevation with approval and full logging" + - "No direct non-EU operator accounts on infra" + sustainability_kpis: + targets: + pue_max: 1.4 + renewable_share_min_percent: 80 + energy_reuse_target: "Heat reuse in regions with DH networks" + wue_goal: "Minimise water use; prefer dry coolers in water-stressed regions" + measurement: + - "Facility-level meters integrated into telemetry" + - "Monthly and annual sustainability dashboards" + policy_as_code: + - "OPA/Kyverno policies in fr/policies/ enforce namespace/storage alignment to data classes" + - "CI checks reject manifests violating FR residency and sustainability constraints" + +git_structure_and_pipelines: + repos: + - name: "infra-foundation-fr" + contents: + - "fr/facility/site_manifests/" + - "fr/facility/rack_layouts/" + - "fr/facility/power_and_cooling/" + - "network/fr/terraform/" + - "proxmox/fr/ansible/" + - "maas/fr/profiles/" + - name: "platform-clusters-fr" + contents: + - "k8s/clusters/microdc-fr-v1/" + - "addons/monitoring-logging-security/" + - "workloads/reference-apps/" + - name: "policies-and-compliance-fr" + contents: + - "data-classification-fr.yaml" + - "opa-policies-fr/" + - "sustainability-kpis-fr.yaml" + - "rbac-and-iam-fr.yaml" + ci_cd: + pipeline_stages: + - name: "lint_and_unit" + checks: + - "YAML validation, Terraform fmt/validate, Ansible syntax check" + - name: "policy_gates" + checks: + - "OPA/Conftest for GDPR/FR residency rules" + - "Security baselines (no plaintext secrets, required labels/annotations)" + - name: "integration_test" + checks: + - "Ephemeral FR lab deployment (virtual or small test rack)" + - "Network and storage conformance tests" + - name: "promotion_to_template" + checks: + - "Sign-off from SRE, Security, Compliance, Facility leads" + - name: "site_rollout_fr" + strategy: + - "ArgoCD/Flux sync to microdc-fr-v1 clusters with progressive rollout" + - "Blue/green or canary strategies for platform upgrades" + +deployment_runbook: + phases: + - phase: 0 + name: "Policy & FR Site Definition" + owners: + - "Sovereign Compliance & Sustainability Lead" + - "Physical Infrastructure & Facility Engineering Lead" + steps: + - "Define FR-specific data classification (CRITICAL_SOVEREIGN_FR etc.)." + - "Set sustainability targets in line with FR/EU goals." + - "Create fr/site_manifest_fr-.yaml and have DPO/Legal validate." + - "Assess ICPE classification and necessary permits for the FR site." + - phase: 1 + name: "Facility Build-Out — FR" + owners: + - "Physical Infrastructure & Facility Engineering Lead" + steps: + - "Prepare room/container per fr/site_manifest_fr-.yaml." + - "Install racks, UPS, PDUs, cooling per fr/power_chain_fr-.yaml." + - "Connect BMS/DCIM to telemetry gateway; verify metrics export." + - "Run safety checks and initial PUE estimation." + - phase: 2 + name: "Network & Out-of-Band Bring-Up — FR" + owners: + - "Network Architect" + - "Security Architect" + steps: + - "Deploy ToR and core configs from network/fr/terraform." + - "Establish OOB network and secure admin paths (French bastions)." + - "Run automated reachability tests and segmentation verification." + - phase: 3 + name: "Bare-Metal & Hypervisor Provisioning — FR" + owners: + - "Bare-Metal Provisioning Lead" + - "Virtualization Architect" + steps: + - "Commission nodes in MAAS with fr-specific hardware profiles." + - "Install Proxmox/K8s base OS via fr/ansible roles." + - "Validate firmware, RAID, NIC bonding, and GPU detection." + - phase: 4 + name: "Platform Bootstrap — FR" + owners: + - "Principal SRE" + - "Automation & IaC Lead" + steps: + - "Deploy GitOps operator, point at platform-clusters-fr repo." + - "Sync base K8s cluster (and optional OpenStack) for microdc-fr-v1." + - "Deploy core platform services: CNI, CSI, ingress, observability, security agents." + - phase: 5 + name: "Compliance & Telemetry Validation — FR" + owners: + - "Sovereign Compliance & Sustainability Lead" + - "Observability & Telemetry Architect" + steps: + - "Verify all required facility metrics (PUE components, temps, power) are collected." + - "Run synthetic residency tests to ensure data stays in FR/EU as required." + - "Generate initial GDPR/DPIA evidence pack and sustainability report." + - phase: 6 + name: "Workload Onboarding — FR" + owners: + - "Platform Lifecycle & Operations Lead" + - "Capacity & Performance Engineer" + steps: + - "Define workload templates mapped to FR data classes." + - "Place workloads into appropriate namespaces/tenants." + - "Run perf baselines and tune quotas/limits." + - "Define SLOs/SLIs and error budgets per critical FR service." + - phase: 7 + name: "Scale-Out & Multi-Site Federation — FR" + owners: + - "Principal SRE" + - "Network Architect" + steps: + - "Clone microdc-fr-v1 to additional FR regions with site-specific overrides." + - "Implement identity and policy federation across FR modules." + - "Regularly compare KPIs and refine the FR reference design." + +verification_and_validation: + automated_checks: + - "IaC unit/integration tests for FR modules" + - "Policy-as-code checks for GDPR/FR residency and security" + - "Post-deploy conformance tests for network, storage, and platform" + manual_reviews: + - "DPO/legal review for GDPR and French data protection alignment" + - "Facility audit for physical security, fire safety, and ICPE/regulatory compliance" + - "Quarterly sustainability review vs FR/EU targets (PUE, renewables, reuse)" + continuous_improvement: + - "Chaos drills in FR test windows to validate reliability" + - "Post-incident reviews feeding into blueprint updates" + - "Versioned evolution: microdc-fr-v1 → v2 with clear change logs" + +council_alignment: + outcome_requirements_satisfied: + - "zero_manual_provisioning" + - "zero_snowflake_clusters" + - "fully_reproducible_infra_from_git" + - "multi_dc_consistency" + - "ha_control_planes" + - "predictable_gpu_performance" + - "automated_lifecycle_management" + - "telemetry_and_self_healing" + - "clear_slo_sli_error_budgets" + - "security_and_compliance_built_in" + - "gdpr_and_data_sovereignty_alignment" + - "eco_efficiency_and_sustainability_kpis" + - "architecture_must_be_deployable" + - "all_answers_validated_by_cross_seat_consensus" +```