X86
sudo apt update sudo apt upgrade -y
sudo apt-get install libssl-dev libcurl4-openssl-dev python3.12 python3.12-venv python3.12-dev -y
python3.12 -m venv venv
source venv/bin/activate
pip install torch==2.7.1 torchaudio==2.7.1 torchvision
pip install xformers flashinfer-python vllm==0.10.0
vllm serve RedHatAI/gemma-3-27b-it-FP8-dynamic --served-model-name gemma3 --host 0.0.0.0 --port 9000 --gpu-memory-utilization 0.9 --tensor-parallel-size 1 --max-model-len 98304 --disable-log-requests --dtype bfloat16 --enable-chunked-prefill --enable-prefix-caching --max-num-batched-tokens 8192 --chat-template-content-format openai