{
  "schema_version": "1.0",
  "baseline": "A-feature-flags-batch-4-readiness",
  "phase": "2b+ → 2c readiness",
  "updated_at": "2026-04-19",
  "owner": "session_a",
  "purpose_th": "อัพเกรด cache_invalidation_contract.json ให้ใกล้ implementation · ระบุ runtime wiring ที่ต้องใส่ใน app.py · subscriber behaviour · stampede protection · failure modes · observability",
  "purpose_en": "Runtime-level readiness notes for FF Redis cache. Upgrades cache_invalidation_contract.json by prescribing the concrete wiring points in app.py: client init, subscriber start-up, single-flight locks, failure modes, and observability. Phase 2b+ ships NOTHING from this file.",
  "honest_note": "No Redis client is wired in 2b+ runtime. docker/ai/redis/init-cluster.sh exists but no app code connects to it. This document is the target Phase 2c wiring pass must implement, not a claim of current state.",
  "upgrade_relation": {
    "supersedes_for_runtime": "cache_invalidation_contract.json (kept as contract-only spec)",
    "adds": [
      "Concrete client init stanza",
      "Subscriber loop semantics",
      "Single-flight lock protocol for JWKS + flag refresh",
      "Graceful degradation behaviour if Redis unreachable",
      "Metric names for observability"
    ]
  },
  "client_init": {
    "library": "redis-py >= 5.0 (async via asyncio)",
    "env_var": "FF_REDIS_URL",
    "connection_options": {
      "decode_responses": true,
      "socket_timeout_ms": 200,
      "socket_connect_timeout_ms": 500,
      "retry_on_timeout": false,
      "health_check_interval_seconds": 30
    },
    "lifecycle": "init in app.on_startup · close in app.on_shutdown",
    "fallback_behaviour": "if FF_REDIS_URL unset OR connection fails on startup → log warning once · operate uncached · do NOT crash",
    "current_state": "not_wired"
  },
  "key_patterns_enforced": {
    "flag_registry": "ptt:ff:flag:<flag_id>",
    "tenant_override": "ptt:ff:override:tenant:<tenant_id>:<flag_id>",
    "user_override": "ptt:ff:override:user:<user_id>:<flag_id>",
    "evaluated": "ptt:ff:eval:<user_id>:<tenant_id>:<flag_id>",
    "jwks": "ptt:ff:jwks:current",
    "approval_ttl": "ptt:ff:approval:<approval_id> (cross-service cached copy of admin view)",
    "invariant": "all FF keys prefix with ptt:ff: · never write outside prefix · never read from Admin's ptt:admin: prefix"
  },
  "ttl_values_seconds": {
    "flag_registry": 300,
    "tenant_override": 60,
    "user_override": 60,
    "evaluated": 30,
    "jwks": 3600,
    "approval_ttl_mirror": 30,
    "honest_note": "Shorter TTL on evaluated keys is intentional — recovers from missed invalidation events quickly. Can be tuned once invalidation publisher is proven reliable."
  },
  "invalidation_triggers_runtime": [
    {
      "trigger": "PUT /v1/flags/override/tenant/{tenant_id}/{flag_id}",
      "invalidate_keys": [
        "ptt:ff:override:tenant:<tenant_id>:<flag_id>",
        "ptt:ff:eval:*:<tenant_id>:<flag_id> (scan + del)"
      ],
      "publish_channel": "ptt.ff.invalidate",
      "message_shape": { "kind": "tenant_override", "tenant_id": "T-pty-pilot-01", "flag_id": "ff.x", "ts": "<iso>" }
    },
    {
      "trigger": "PUT /v1/flags/override/user/{user_id}/{flag_id}",
      "invalidate_keys": [
        "ptt:ff:override:user:<user_id>:<flag_id>",
        "ptt:ff:eval:<user_id>:*:<flag_id> (scan + del · user-scoped)"
      ],
      "publish_channel": "ptt.ff.invalidate",
      "message_shape": { "kind": "user_override", "user_id": "U1", "flag_id": "ff.x", "ts": "<iso>" }
    },
    {
      "trigger": "Flag registry reload (admin action or file change detection)",
      "invalidate_keys": [
        "ptt:ff:flag:<flag_id> (one key)",
        "ptt:ff:eval:*:*:<flag_id> (scan + del · broad)"
      ],
      "publish_channel": "ptt.ff.invalidate",
      "message_shape": { "kind": "flag_registry", "flag_id": "ff.x", "ts": "<iso>" }
    },
    {
      "trigger": "Global invalidate (operator command)",
      "invalidate_keys": ["ptt:ff:* (FLUSHDB-like · scoped delete)"],
      "publish_channel": "ptt.ff.invalidate",
      "message_shape": { "kind": "global", "reason": "operator_requested", "ts": "<iso>" },
      "guardrails": "Requires admin role · audit.boundary event emitted · rate limited to 1/minute/operator"
    }
  ],
  "subscriber_loop_semantics": {
    "pattern": "single long-lived asyncio task started on app startup",
    "channel": "ptt.ff.invalidate",
    "on_message_action": "parse kind · delete corresponding keys in local process + coordinated delete across cluster via Redis DEL",
    "on_connection_loss": "exponential backoff up to 30s · log each reconnect · drop stale local cache on successful reconnect",
    "idempotency": "each message must be safe to process twice · DEL is idempotent · log duplicate detection via message_id + LRU seen-set",
    "ordering": "best-effort per publisher · FF does NOT rely on strict ordering",
    "current_state": "not_wired"
  },
  "stampede_protection": {
    "strategy": "single-flight lock on cache miss using SET ... NX EX",
    "lock_key_pattern": "ptt:ff:lock:<cache-key>",
    "lock_ttl_seconds": 5,
    "waiter_behaviour": "sleep 50ms · retry read · give up after 3 retries → fall through to authoritative source",
    "applies_to": ["flag_registry fetch", "JWKS fetch"],
    "does_not_apply_to": ["per-user eval keys (low cardinality per user)", "per-tenant overrides (typically cold lookup)"],
    "current_state": "not_wired"
  },
  "eventual_consistency_caveats": [
    "Invalidation publish is fire-and-forget · a slow subscriber may serve stale for up to subscriber_loop_semantics.on_connection_loss backoff duration",
    "Two concurrent writes to the same override key race at the DB level · last-writer-wins is acceptable · readers see one of the two",
    "During dual_write_parity window · readers see file-backed answer · writers update both · divergence detected by parity harness",
    "Cache is an optimisation · authoritative answer always comes from DB (or file in 2b+)",
    "No read-your-writes guarantee across replicas · documented limitation"
  ],
  "observability_metrics": [
    { "metric": "ff_cache_hit_total", "type": "counter", "labels": ["namespace"] },
    { "metric": "ff_cache_miss_total", "type": "counter", "labels": ["namespace"] },
    { "metric": "ff_cache_invalidate_total", "type": "counter", "labels": ["kind"] },
    { "metric": "ff_invalidation_latency_seconds", "type": "histogram" },
    { "metric": "ff_redis_reconnect_total", "type": "counter" },
    { "metric": "ff_cache_stampede_retry_total", "type": "counter", "labels": ["cache_key_pattern"] },
    { "current_state": "none_wired" }
  ],
  "what_is_simulated_vs_wired": {
    "simulated_in_2b_plus": [
      "Key pattern naming · documented only",
      "TTL map · documented only",
      "Channel naming · documented only",
      "Invalidation message shape · documented only"
    ],
    "wired_in_2b_plus": [],
    "wired_in_2c_target": [
      "Client init · subscriber loop · single-flight lock · metric emission",
      "Graceful degradation behaviour",
      "Cross-service approval_ttl mirror (FF caches Admin view)"
    ]
  },
  "failure_matrix": [
    { "condition": "Redis unreachable on startup", "behaviour": "operate uncached · warn once · do not retry tight loop", "user_impact": "slower response · no correctness impact" },
    { "condition": "Redis goes down mid-service", "behaviour": "per-request degradation · timeout 200ms · fall to authoritative source · retry on next scheduled health_check", "user_impact": "slower · no correctness impact" },
    { "condition": "Subscriber disconnected", "behaviour": "local cache may serve stale up to TTL · rely on TTL expiry for eventual consistency", "user_impact": "bounded staleness" },
    { "condition": "Cache corruption (bad JSON in value)", "behaviour": "parse error → DEL key · log · serve from authoritative source", "user_impact": "self-healing" },
    { "condition": "Single-flight lock lost (TTL expired mid-operation)", "behaviour": "second request proceeds uncoordinated · acceptable · one duplicate fetch at worst", "user_impact": "none" }
  ],
  "pair_with": {
    "cache_invalidation_contract": "docs/runtime/feature-flags-service/cache_invalidation_contract.json",
    "redis_channel_examples": "docs/runtime/feature-flags-service/redis_channel_examples.json",
    "jwks_notes": "docs/runtime/feature-flags-service/jwks_integration_notes.json",
    "persistence_schema": "docs/runtime/feature-flags-service/persistence_schema.json",
    "cutover_readiness_checklist": "docs/runtime/feature-flags-service/cutover_readiness_checklist.json",
    "docker_redis_infra": "docker/ai/redis/init-cluster.sh"
  }
}
