diff --git a/openapi.yaml b/openapi.yaml
index c2be354..0444fde 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -7034,16 +7034,13 @@ paths:
           content:
             application/json:
               schema:
-                type: array
-                items:
-                  type: object
-                  $ref: '#/components/schemas/RL.TrainingSession'
+                $ref: '#/components/schemas/RL.TrainingSessionsListResponse'
         default:
           description: An unexpected error response.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: status
           in: query
@@ -7057,12 +7054,16 @@ paths:
           schema:
             description: Maximum number of sessions to return (1-100), defaults to 20
             type: integer
+            format: int32
+            default: "20"
         - name: offset
           in: query
           required: false
           schema:
             description: Number of sessions to skip
             type: integer
+            format: int32
+            default: "0"
     post:
       summary: Create training session
       description: Creates a training session and returns its details.
@@ -7086,7 +7087,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
         
   /rl/training-sessions/{session_id}:
     get:
@@ -7106,7 +7107,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7132,7 +7133,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7164,7 +7165,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7196,7 +7197,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7210,7 +7211,7 @@ paths:
           schema:
             description: Operation ID
             type: string
-  /rl/training-sessions/{session_id}:forward-backward:
+  /rl/training-sessions/{session_id}/operations/forward-backward:
     post:
       summary: Forward-backward pass
       description: Submits a forward-backward pass operation that will asynchronously compute gradients via backpropagation.
@@ -7234,7 +7235,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7242,7 +7243,7 @@ paths:
           schema:
             description: Training session ID
             type: string
-  /rl/training-sessions/{session_id}:optim-step:
+  /rl/training-sessions/{session_id}/operations/optim-step:
     post:
       summary: Optimizer step
       description: Submits an optimizer step operation that will asynchronously apply accumulated gradients to update model parameters.
@@ -7266,7 +7267,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7274,7 +7275,7 @@ paths:
           schema:
             description: Training session ID
             type: string
-  /rl/training-sessions/{session_id}:sample:
+  /rl/training-sessions/{session_id}/operations/sample:
     post:
       summary: Sample
       description: Submits a sample operation that will asynchronously generate text completions with logprobs.
@@ -7298,7 +7299,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7306,7 +7307,7 @@ paths:
           schema:
             description: Training session ID
             type: string
-  /rl/training-sessions/{session_id}:stop:
+  /rl/training-sessions/{session_id}/stop:
     post:
       summary: Stop training session
       description: Stops a training session.
@@ -7324,7 +7325,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorResponse'
+                $ref: '#/components/schemas/RpcStatus'
       parameters:
         - name: session_id
           in: path
@@ -7348,7 +7349,9 @@ components:
         learning_rate:
           description: Learning rate for this step.
           type: number
-          default: 0.0001
+          format: float
+          example: 0.0001
+          default: "0.0001"
         adamw_params:
           $ref: '#/components/schemas/RL.AdamWOptimizerParams'
     RL.SampleBody:
@@ -7364,8 +7367,9 @@ components:
           description: Optional sampling parameters
         num_samples:
           type: integer
+          format: int64
           example: 1
-          default: 1
+          default: "1"
           description: Number of completions to generate for this prompt
     RL.ForwardBackwardBody:
       type: object
@@ -7398,23 +7402,27 @@ components:
       properties:
         max_tokens:
           type: integer
+          format: int32
           example: 100
-          default: 100
+          default: "100"
           description: Maximum number of tokens to generate per completion
         temperature:
           type: number
           example: 1
-          default: 1.0
+          format: float
+          default: "1.0"
           description: Sampling temperature
         top_p:
           type: number
           example: 1
-          default: 1.0
+          format: float
+          default: "1.0"
           description: Nucleus sampling probability threshold
         top_k:
           type: integer
+          format: int32
           example: -1
-          default: -1
+          default: "-1"
           description: Top-k sampling limit
         stop:
           type: array
@@ -7425,7 +7433,8 @@ components:
             type: string
           description: Generation stops when any of these strings is produced
         seed:
-          type: integer
+          type: string
+          format: int64
           example: 42
           description: Random seed for reproducibility
     RL.InputChunk:
@@ -7433,6 +7442,24 @@ components:
       properties:
         encoded_text:
           $ref: '#/components/schemas/RL.EncodedText'
+    RL.ListMeta:
+      type: object
+      properties:
+        total:
+          type: string
+          format: int64
+          example: 42
+          description: Total number of items matching the filter
+        limit:
+          type: integer
+          format: int32
+          example: 20
+          description: Maximum number of items returned per page
+        offset:
+          type: integer
+          format: int32
+          example: 0
+          description: Number of items skipped
     RL.EncodedText:
       type: object
       properties:
@@ -7444,7 +7471,8 @@ components:
             - 456
             - 789
           items:
-            type: integer
+            type: string
+            format: int64
     RL.DType:
       type: string
       enum:
@@ -7469,20 +7497,34 @@ components:
       properties:
         clip_low:
           type: number
+          format: float
           example: 0.2
+          default: "0.2"
           description: Lower clip bound for importance ratio
         clip_high:
           type: number
+          format: float
           example: 0.28
+          default: "0.28"
           description: Upper clip bound for importance ratio
         beta:
           type: number
+          format: float
           example: 0.1
+          default: "0.0"
           description: KL penalty coefficient
         agg_type:
-          type: string
-          example: fixed_horizon
+          $ref: '#/components/schemas/RL.GRPOLossAggregationType'
+          example: GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
+          default: "GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON"
           description: Aggregation type for loss computation
+    RL.GRPOLossAggregationType:
+      type: string
+      enum:
+        - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
+        - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
+        - GRPO_LOSS_AGGREGATION_TYPE_PER_TOKEN
+      default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
     RL.LossConfig:
       type: object
       required:
@@ -7509,7 +7551,8 @@ components:
             - 456
             - 789
           items:
-            type: integer
+            type: string
+            format: int64
         dtype:
           description: Data type of the integer array
           $ref: '#/components/schemas/RL.DType'
@@ -7528,7 +7571,8 @@ components:
             - 0
             - 1
           items:
-            type: integer
+            type: string
+            format: int64
         dtype:
           description: Data type of the integer array (must be D_TYPE_INT64)
           $ref: '#/components/schemas/RL.DType'
@@ -7546,6 +7590,7 @@ components:
             - 0.5
           items:
             type: number
+            format: float
         dtype:
           description: Data type of the float array (D_TYPE_FLOAT32 or D_TYPE_BFLOAT16)
           $ref: '#/components/schemas/RL.DType'
@@ -7563,6 +7608,7 @@ components:
             - -0.8
           items:
             type: number
+            format: float
         dtype:
           description: Data type of the float array (D_TYPE_FLOAT32 or D_TYPE_BFLOAT16)
           $ref: '#/components/schemas/RL.DType'
@@ -7608,21 +7654,21 @@ components:
     RL.OptimStepOperation:
       type: object
       properties:
-        operation_id:
+        id:
           type: string
           example: 550e8400-e29b-41d4-a716-446655440000
           description: Operation ID
         status:
           $ref: '#/components/schemas/RL.TrainingOperationStatus'
-          description: Current operation status
-        data:
+          description: Operation status
+        output:
           $ref: '#/components/schemas/RL.OptimStepResult'
         error:
           $ref: '#/components/schemas/RL.TrainingOperationError'
     RL.SampleOperation:
       type: object
       properties:
-        operation_id:
+        id:
           type: string
           example: 550e8400-e29b-41d4-a716-446655440000
           description: Operation ID
@@ -7630,7 +7676,7 @@ components:
           $ref: '#/components/schemas/RL.TrainingOperationStatus'
           example: TRAINING_OPERATION_STATUS_PENDING
           description: Operation status
-        data:
+        output:
           $ref: '#/components/schemas/RL.SampleResult'
         error:
           $ref: '#/components/schemas/RL.TrainingOperationError'
@@ -7639,7 +7685,8 @@ components:
       properties:
         step:
           description: Step number
-          type: integer
+          type: string
+          format: uint64
           example: 100
     RL.SampleResult:
       type: object
@@ -7660,7 +7707,8 @@ components:
             - 456
             - 789
           items:
-            type: integer
+            type: string
+            format: int64
           description: Generated token IDs
         logprobs:
           type: array
@@ -7669,7 +7717,8 @@ components:
             - -1.2
             - -0.3
           items:
-            type: integer
+            type: number
+            format: double
           description: Log probabilities for each generated token
         stop_reason:
           type: string
@@ -7678,14 +7727,15 @@ components:
     RL.ForwardBackwardOperation:
       type: object
       properties:
-        operation_id:
+        id:
           type: string
+          example: 550e8400-e29b-41d4-a716-446655440000
           description: Operation ID
         status:
           $ref: '#/components/schemas/RL.TrainingOperationStatus'
           example: TRAINING_OPERATION_STATUS_PENDING
           description: Operation status
-        data:
+        output:
           $ref: '#/components/schemas/RL.ForwardBackwardResult'
         error:
           $ref: '#/components/schemas/RL.TrainingOperationError'
@@ -7701,17 +7751,27 @@ components:
           description: Loss-specific metrics (e.g., KL divergence, clip fraction for GRPO)
           additionalProperties:
             type: number
+            format: double
     RL.TrainingOperationError:
       type: object
       properties:
         code:
           description: Application error code
-          type: string
-          example: OPERATION_TIMEOUT
+          $ref: '#/components/schemas/RL.TrainingOperationErrorCode'
+          example: TRAINING_OPERATION_ERROR_CODE_TIMEOUT
         message:
           description: Human-readable error message
           type: string
           example: Operation timed out
+    RL.TrainingOperationErrorCode:
+      type: string
+      enum:
+        - TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
+        - TRAINING_OPERATION_ERROR_CODE_RESOURCE_EXHAUSTED
+        - TRAINING_OPERATION_ERROR_CODE_TIMEOUT
+        - TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR
+        - TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE
+      default: TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
     RL.TrainingOperationStatus:
       type: string
       enum:
@@ -7747,6 +7807,7 @@ components:
         - TRAINING_SESSION_STATUS_STOPPED
         - TRAINING_SESSION_STATUS_STOPPING
         - TRAINING_SESSION_STATUS_ERROR
+        - TRAINING_SESSION_STATUS_EXPIRED
     RL.TrainingSession:
       type: object
       properties:
@@ -7764,7 +7825,9 @@ components:
           type: string
         step:
           description: Current training step
-          type: integer
+          type: string
+          format: uint64
+          default: "0"
         created_at:
           type: string
           format: date-time
@@ -7775,21 +7838,39 @@ components:
           description: Timestamp when the training session was last updated
         lora_config:
           $ref: '#/components/schemas/RL.LoraConfig'
+    RL.TrainingSessionsListResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            $ref: '#/components/schemas/RL.TrainingSession'
+          description: List of training sessions
+        meta:
+          $ref: '#/components/schemas/RL.ListMeta'
+          description: Pagination metadata
     RL.LoraConfig:
       type: object
       description: LoRA adapter configuration
       properties:
         rank:
           type: integer
-          default: 8
+          format: int64
+          example: 8
+          default: "8"
           description: Rank of the LoRA adapter
         alpha:
           type: integer
-          default: 16
+          format: int64
+          example: 16
+          default: "16"
           description: Alpha of the LoRA adapter
         dropout:
           type: number
-          default: 0.05
+          format: float
+          example: 0.05
+          default: "0.05"
           description: Dropout of the LoRA adapter
     RL.AdamWOptimizerParams:
       description: AdamW optimizer parameters
@@ -7798,19 +7879,27 @@ components:
         beta1:
           description: First moment decay rate
           type: number
-          default: 0.9
+          format: float
+          example: 0.9
+          default: "0.9"
         beta2:
           description: Second moment decay rate
           type: number
-          default: 0.95
+          format: float
+          example: 0.95
+          default: "0.95"
         eps:
           description: Epsilon for numerical stability
           type: number
-          default: 1e-8
+          format: float
+          example: 1e-08
+          default: "1e-8"
         weight_decay:
           description: Weight decay coefficient
           type: number
-          default: 0.1
+          format: float
+          example: 0.1
+          default: "0.1"
     ErrorResponse:
       type: object
       properties:
@@ -7818,6 +7907,25 @@ components:
           type: integer
         message:
           type: string
+    ProtobufAny:
+      type: object
+      properties:
+        '@type':
+          type: string
+      additionalProperties: {}
+    RpcStatus:
+      type: object
+      properties:
+        code:
+          type: integer
+          format: int32
+        message:
+          type: string
+        details:
+          type: array
+          items:
+            type: object
+            $ref: '#/components/schemas/ProtobufAny'
     GPUClusterControlPlaneNode:
       type: object
       required: