Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/actions/helm-deploy/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ inputs:
description: 'Kubernetes namespace'
required: true
default: 'backend'
helm_set_files:
description: 'Comma-separated --set-file args, e.g. "configMapData.download=scripts/download.sh"'
required: false
default: ''
github_token:
description: 'GitHub token for Helm installation'
required: true
Expand Down Expand Up @@ -92,11 +96,23 @@ runs:
${{ steps.parse_env.outputs.helm_env_values }}
EOF

# Build --set-file flags from comma-separated input
SET_FILE_FLAGS=""
if [ -n "${{ inputs.helm_set_files }}" ]; then
IFS=',' read -ra PAIRS <<< "${{ inputs.helm_set_files }}"
for pair in "${PAIRS[@]}"; do
pair="${pair#"${pair%%[![:space:]]*}"}"
pair="${pair%"${pair##*[![:space:]]}"}"
[ -n "$pair" ] && SET_FILE_FLAGS="$SET_FILE_FLAGS --set-file $pair"
done
fi

# Deploy using Helm (values file won't be logged due to file redirection)
helm upgrade --install slm-server ./deploy/helm \
--namespace ${{ inputs.namespace }} \
--create-namespace \
--values "$temp_values" \
$SET_FILE_FLAGS \
--wait \
--timeout 10m

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,6 @@ jobs:
helm_values_env: ${{ secrets.HELM_VALUES_ENV }}
helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
helm_set_files: 'configMapData.download=scripts/download.sh'
namespace: ${{ env.NAMESPACE }}
github_token: ${{ secrets.GITHUB_TOKEN }}
1 change: 1 addition & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ jobs:
helm_values_env: ${{ secrets.HELM_VALUES_ENV }}
helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
helm_set_files: 'configMapData.download=scripts/download.sh'
namespace: ${{ env.NAMESPACE }}
github_token: ${{ secrets.GITHUB_TOKEN }}
8 changes: 6 additions & 2 deletions deploy/helm/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
{{- if .Values.configMapData }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "slm-server.fullname" . }}-scripts
labels:
{{- include "slm-server.labels" . | nindent 4 }}
data:
download.sh: |-
{{- .Files.Get "scripts/download.sh" | nindent 4 }}
{{- range $key, $val := .Values.configMapData }}
{{ $key }}: |
{{- $val | nindent 4 }}
{{- end }}
{{- end }}
30 changes: 9 additions & 21 deletions deploy/helm/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,10 @@ spec:
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.initContainers }}
initContainers:
- name: download-model
image: busybox:1.28
command: ["sh", "-c", "/scripts/download.sh"]
volumeMounts:
- name: models-storage
mountPath: {{ .Values.persistence.mountPath }}
- name: scripts
mountPath: /scripts
{{- tpl (toYaml .Values.initContainers) . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
Expand Down Expand Up @@ -67,20 +62,13 @@ spec:
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- if .Values.volumeMounts }}
volumeMounts:
- name: models-storage
mountPath: {{ .Values.persistence.mountPath }}
volumes:
- name: models-storage
{{- if .Values.persistence.enabled }}
persistentVolumeClaim:
claimName: {{ include "slm-server.fullname" . }}
{{- else }}
emptyDir: {}
{{- tpl (toYaml .Values.volumeMounts) . | nindent 12 }}
{{- end }}
- name: scripts
configMap:
name: {{ include "slm-server.fullname" . }}-scripts
defaultMode: 0755
{{- if .Values.volumes }}
volumes:
{{- tpl (toYaml .Values.volumes) . | nindent 8 }}
{{- end }}
strategy:
{{- toYaml .Values.strategy | nindent 4 }}
30 changes: 30 additions & 0 deletions deploy/helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,35 @@ probes:
successThreshold: 1
failureThreshold: 5

# Content is injected at deploy time via: --set-file configMapData.download=scripts/download.sh
configMapData: {}

# Volumes and volumeMounts rendered via tpl so template expressions work.
volumes:
- name: models-storage
persistentVolumeClaim:
claimName: '{{ include "slm-server.fullname" . }}'
- name: scripts
configMap:
name: '{{ include "slm-server.fullname" . }}-scripts'
defaultMode: 0755
items:
- key: download
path: download.sh

volumeMounts:
- name: models-storage
mountPath: /app/models

initContainers:
- name: download-model
image: curlimages/curl:latest
command: ["sh", "-c", "MODEL_DIR=/app/models sh /scripts/download.sh"]
volumeMounts:
- name: models-storage
mountPath: /app/models
- name: scripts
mountPath: /scripts

strategy:
type: Recreate
102 changes: 41 additions & 61 deletions scripts/download.sh
Original file line number Diff line number Diff line change
@@ -1,65 +1,45 @@
#!/bin/bash
#!/bin/sh
#
# Download model files for slm-server.
#
# When run inside the init container, MODEL_DIR is set by the caller
# (the Helm-rendered configmap). For local use it defaults to
# ../models relative to this script.

set -ex
set -e

# Get the absolute path of the directory where the script is located
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)

# Original (official Qwen repo, Q8_0 only):
# https://huggingface.co/Qwen/Qwen3-0.6B-GGUF -> Qwen3-0.6B-Q8_0.gguf
# Switched to second-state community repo for Q4_K_M quantization.
# See README.md "Model Choice" section for rationale.
REPO_URL="https://huggingface.co/second-state/Qwen3-0.6B-GGUF"
# Set model directory relative to the script's location
MODEL_DIR="$SCRIPT_DIR/../models"

# Create the directory if it doesn't exist
MODEL_DIR="${MODEL_DIR:-$(cd -- "$(dirname "$0")" && pwd)/../models}"
mkdir -p "$MODEL_DIR"

# --- Files to download ---
FILES_TO_DOWNLOAD=(
"Qwen3-0.6B-Q4_K_M.gguf"
# Previous default: "Qwen3-0.6B-Q8_0.gguf" (805 MB, from Qwen/Qwen3-0.6B-GGUF)
)

echo "Downloading Qwen3-0.6B-GGUF model and params files..."

for file in "${FILES_TO_DOWNLOAD[@]}"; do
if [ -f "$MODEL_DIR/$file" ]; then
echo "$file already exists, skipping download."
else
echo "Downloading $file..."
wget -P "$MODEL_DIR" "$REPO_URL/resolve/main/$file" || {
echo "Failed to download $file with wget, trying curl..."
curl -L -o "$MODEL_DIR/$file" "$REPO_URL/resolve/main/$file"
}
fi
done

# --- Embedding model: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) ---
EMBEDDING_REPO_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_MODEL_DIR="$MODEL_DIR/all-MiniLM-L6-v2"

mkdir -p "$EMBEDDING_MODEL_DIR/onnx"

EMBEDDING_FILES=(
"onnx/model_quint8_avx2.onnx"
"tokenizer.json"
)

echo "Downloading all-MiniLM-L6-v2 ONNX embedding model..."

for file in "${EMBEDDING_FILES[@]}"; do
dest="$EMBEDDING_MODEL_DIR/$file"
if [ -f "$dest" ]; then
echo "$file already exists, skipping download."
else
echo "Downloading $file..."
wget -O "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" || {
echo "Failed to download $file with wget, trying curl..."
curl -L -o "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file"
}
fi
done

echo "Download process complete! Files are in $MODEL_DIR"
# --- Chat LLM: Qwen3-0.6B (Q4_K_M quantisation from second-state) ---
GGUF_REPO="https://huggingface.co/second-state/Qwen3-0.6B-GGUF"
GGUF_FILE="Qwen3-0.6B-Q4_K_M.gguf"

if [ -f "$MODEL_DIR/$GGUF_FILE" ]; then
echo "$GGUF_FILE already exists, skipping."
else
echo "Downloading $GGUF_FILE ..."
curl -fSL -o "$MODEL_DIR/$GGUF_FILE" "$GGUF_REPO/resolve/main/$GGUF_FILE"
fi

# --- Embedding: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) ---
EMBED_REPO="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
EMBED_DIR="$MODEL_DIR/all-MiniLM-L6-v2"
mkdir -p "$EMBED_DIR/onnx"

if [ -f "$EMBED_DIR/tokenizer.json" ]; then
echo "tokenizer.json already exists, skipping."
else
echo "Downloading tokenizer.json ..."
curl -fSL -o "$EMBED_DIR/tokenizer.json" "$EMBED_REPO/resolve/main/tokenizer.json"
fi

ONNX_FILE="model_quint8_avx2.onnx"
if [ -f "$EMBED_DIR/onnx/$ONNX_FILE" ]; then
echo "$ONNX_FILE already exists, skipping."
else
echo "Downloading $ONNX_FILE ..."
curl -fSL -o "$EMBED_DIR/onnx/$ONNX_FILE" "$EMBED_REPO/resolve/main/onnx/$ONNX_FILE"
fi

echo "Download complete. Files are in $MODEL_DIR"