diff --git a/.github/actions/helm-deploy/action.yml b/.github/actions/helm-deploy/action.yml index 011f8d9..2c52b56 100644 --- a/.github/actions/helm-deploy/action.yml +++ b/.github/actions/helm-deploy/action.yml @@ -24,6 +24,10 @@ inputs: description: 'Kubernetes namespace' required: true default: 'backend' + helm_set_files: + description: 'Comma-separated --set-file args, e.g. "configMapData.download=scripts/download.sh"' + required: false + default: '' github_token: description: 'GitHub token for Helm installation' required: true @@ -92,11 +96,23 @@ runs: ${{ steps.parse_env.outputs.helm_env_values }} EOF + # Build --set-file flags from comma-separated input + SET_FILE_FLAGS="" + if [ -n "${{ inputs.helm_set_files }}" ]; then + IFS=',' read -ra PAIRS <<< "${{ inputs.helm_set_files }}" + for pair in "${PAIRS[@]}"; do + pair="${pair#"${pair%%[![:space:]]*}"}" + pair="${pair%"${pair##*[![:space:]]}"}" + [ -n "$pair" ] && SET_FILE_FLAGS="$SET_FILE_FLAGS --set-file $pair" + done + fi + # Deploy using Helm (values file won't be logged due to file redirection) helm upgrade --install slm-server ./deploy/helm \ --namespace ${{ inputs.namespace }} \ --create-namespace \ --values "$temp_values" \ + $SET_FILE_FLAGS \ --wait \ --timeout 10m diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 284640c..8f6079c 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -83,5 +83,6 @@ jobs: helm_values_env: ${{ secrets.HELM_VALUES_ENV }} helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }} helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }} + helm_set_files: 'configMapData.download=scripts/download.sh' namespace: ${{ env.NAMESPACE }} github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 14a53e2..d3efc14 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -29,5 +29,6 @@ jobs: helm_values_env: ${{ secrets.HELM_VALUES_ENV }} helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }} helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }} + helm_set_files: 'configMapData.download=scripts/download.sh' namespace: ${{ env.NAMESPACE }} github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/deploy/helm/templates/configmap.yaml b/deploy/helm/templates/configmap.yaml index 4fb1253..57f9832 100644 --- a/deploy/helm/templates/configmap.yaml +++ b/deploy/helm/templates/configmap.yaml @@ -1,3 +1,4 @@ +{{- if .Values.configMapData }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,5 +6,8 @@ metadata: labels: {{- include "slm-server.labels" . | nindent 4 }} data: - download.sh: |- - {{- .Files.Get "scripts/download.sh" | nindent 4 }} + {{- range $key, $val := .Values.configMapData }} + {{ $key }}: | + {{- $val | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/templates/deployment.yaml b/deploy/helm/templates/deployment.yaml index bd0b995..ba028b4 100644 --- a/deploy/helm/templates/deployment.yaml +++ b/deploy/helm/templates/deployment.yaml @@ -19,15 +19,10 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.initContainers }} initContainers: - - name: download-model - image: busybox:1.28 - command: ["sh", "-c", "/scripts/download.sh"] - volumeMounts: - - name: models-storage - mountPath: {{ .Values.persistence.mountPath }} - - name: scripts - mountPath: /scripts + {{- tpl (toYaml .Values.initContainers) . | nindent 8 }} + {{- end }} containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -67,20 +62,13 @@ spec: {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.volumeMounts }} volumeMounts: - - name: models-storage - mountPath: {{ .Values.persistence.mountPath }} - volumes: - - name: models-storage - {{- if .Values.persistence.enabled }} - persistentVolumeClaim: - claimName: {{ include "slm-server.fullname" . }} - {{- else }} - emptyDir: {} + {{- tpl (toYaml .Values.volumeMounts) . | nindent 12 }} {{- end }} - - name: scripts - configMap: - name: {{ include "slm-server.fullname" . }}-scripts - defaultMode: 0755 + {{- if .Values.volumes }} + volumes: + {{- tpl (toYaml .Values.volumes) . | nindent 8 }} + {{- end }} strategy: {{- toYaml .Values.strategy | nindent 4 }} diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml index 16f83da..6778041 100644 --- a/deploy/helm/values.yaml +++ b/deploy/helm/values.yaml @@ -128,5 +128,35 @@ probes: successThreshold: 1 failureThreshold: 5 +# Content is injected at deploy time via: --set-file configMapData.download=scripts/download.sh +configMapData: {} + +# Volumes and volumeMounts rendered via tpl so template expressions work. +volumes: + - name: models-storage + persistentVolumeClaim: + claimName: '{{ include "slm-server.fullname" . }}' + - name: scripts + configMap: + name: '{{ include "slm-server.fullname" . }}-scripts' + defaultMode: 0755 + items: + - key: download + path: download.sh + +volumeMounts: + - name: models-storage + mountPath: /app/models + +initContainers: + - name: download-model + image: curlimages/curl:latest + command: ["sh", "-c", "MODEL_DIR=/app/models sh /scripts/download.sh"] + volumeMounts: + - name: models-storage + mountPath: /app/models + - name: scripts + mountPath: /scripts + strategy: type: Recreate diff --git a/scripts/download.sh b/scripts/download.sh index d1d4a8f..79d945c 100755 --- a/scripts/download.sh +++ b/scripts/download.sh @@ -1,65 +1,45 @@ -#!/bin/bash +#!/bin/sh +# +# Download model files for slm-server. +# +# When run inside the init container, MODEL_DIR is set by the caller +# (the Helm-rendered configmap). For local use it defaults to +# ../models relative to this script. -set -ex +set -e -# Get the absolute path of the directory where the script is located -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) - -# Original (official Qwen repo, Q8_0 only): -# https://huggingface.co/Qwen/Qwen3-0.6B-GGUF -> Qwen3-0.6B-Q8_0.gguf -# Switched to second-state community repo for Q4_K_M quantization. -# See README.md "Model Choice" section for rationale. -REPO_URL="https://huggingface.co/second-state/Qwen3-0.6B-GGUF" -# Set model directory relative to the script's location -MODEL_DIR="$SCRIPT_DIR/../models" - -# Create the directory if it doesn't exist +MODEL_DIR="${MODEL_DIR:-$(cd -- "$(dirname "$0")" && pwd)/../models}" mkdir -p "$MODEL_DIR" -# --- Files to download --- -FILES_TO_DOWNLOAD=( - "Qwen3-0.6B-Q4_K_M.gguf" - # Previous default: "Qwen3-0.6B-Q8_0.gguf" (805 MB, from Qwen/Qwen3-0.6B-GGUF) -) - -echo "Downloading Qwen3-0.6B-GGUF model and params files..." - -for file in "${FILES_TO_DOWNLOAD[@]}"; do - if [ -f "$MODEL_DIR/$file" ]; then - echo "$file already exists, skipping download." - else - echo "Downloading $file..." - wget -P "$MODEL_DIR" "$REPO_URL/resolve/main/$file" || { - echo "Failed to download $file with wget, trying curl..." - curl -L -o "$MODEL_DIR/$file" "$REPO_URL/resolve/main/$file" - } - fi -done - -# --- Embedding model: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) --- -EMBEDDING_REPO_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" -EMBEDDING_MODEL_DIR="$MODEL_DIR/all-MiniLM-L6-v2" - -mkdir -p "$EMBEDDING_MODEL_DIR/onnx" - -EMBEDDING_FILES=( - "onnx/model_quint8_avx2.onnx" - "tokenizer.json" -) - -echo "Downloading all-MiniLM-L6-v2 ONNX embedding model..." - -for file in "${EMBEDDING_FILES[@]}"; do - dest="$EMBEDDING_MODEL_DIR/$file" - if [ -f "$dest" ]; then - echo "$file already exists, skipping download." - else - echo "Downloading $file..." - wget -O "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" || { - echo "Failed to download $file with wget, trying curl..." - curl -L -o "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" - } - fi -done - -echo "Download process complete! Files are in $MODEL_DIR" +# --- Chat LLM: Qwen3-0.6B (Q4_K_M quantisation from second-state) --- +GGUF_REPO="https://huggingface.co/second-state/Qwen3-0.6B-GGUF" +GGUF_FILE="Qwen3-0.6B-Q4_K_M.gguf" + +if [ -f "$MODEL_DIR/$GGUF_FILE" ]; then + echo "$GGUF_FILE already exists, skipping." +else + echo "Downloading $GGUF_FILE ..." + curl -fSL -o "$MODEL_DIR/$GGUF_FILE" "$GGUF_REPO/resolve/main/$GGUF_FILE" +fi + +# --- Embedding: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) --- +EMBED_REPO="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" +EMBED_DIR="$MODEL_DIR/all-MiniLM-L6-v2" +mkdir -p "$EMBED_DIR/onnx" + +if [ -f "$EMBED_DIR/tokenizer.json" ]; then + echo "tokenizer.json already exists, skipping." +else + echo "Downloading tokenizer.json ..." + curl -fSL -o "$EMBED_DIR/tokenizer.json" "$EMBED_REPO/resolve/main/tokenizer.json" +fi + +ONNX_FILE="model_quint8_avx2.onnx" +if [ -f "$EMBED_DIR/onnx/$ONNX_FILE" ]; then + echo "$ONNX_FILE already exists, skipping." +else + echo "Downloading $ONNX_FILE ..." + curl -fSL -o "$EMBED_DIR/onnx/$ONNX_FILE" "$EMBED_REPO/resolve/main/onnx/$ONNX_FILE" +fi + +echo "Download complete. Files are in $MODEL_DIR"