You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Clone repository
git clone https://github.com/OminiX-ai/OminiX-MLX.git
cd OminiX-MLX
# Build all crates
cargo build --release
# Build specific crate
cargo build --release -p qwen3-mlx
LLM Generation
# Download model
huggingface-cli download mlx-community/Qwen3-4B-bf16 --local-dir ./models/Qwen3-4B
# Run text generation
cargo run --release -p qwen3-mlx --example generate_qwen3 -- ./models/Qwen3-4B "Hello, how are you?"# Run interactive chat
cargo run --release -p qwen3-mlx --example chat_qwen3 -- ./models/Qwen3-4B
use qwen3_mlx::{load_model,Generate,ConcatKeyValueCache};letmut model = load_model("./models/Qwen3-4B")?;letmut cache = Vec::new();let generator = Generate::<ConcatKeyValueCache>::new(&mut model,&mut cache,0.7,&prompt_tokens
);for token in generator.take(100){let token = token?;print!("{}", tokenizer.decode(&[token.item::<u32>()],true)?);}
Speech Recognition
cd funasr-mlx
# Run transcription
cargo run --release --example transcribe -- \
--model ./models/paraformer \
--audio ./audio/test.wav
use funasr_mlx::{load_model, transcribe,Vocabulary};use funasr_mlx::audio::{load_wav, resample};// Load audiolet(samples, rate) = load_wav("audio.wav")?;let samples = resample(&samples, rate,16000);// Load model and transcribeletmut model = load_model("paraformer.safetensors")?;let vocab = Vocabulary::load("tokens.txt")?;let text = transcribe(&mut model,&samples,&vocab)?;
Voice Cloning
cd gpt-sovits-mlx/rust
cargo run --release --example voice_clone -- \
--reference ./audio/reference.wav \
--text "Hello, this is a voice clone."
# Download 8-bit quantized model
huggingface-cli download moxin-org/MiniCPM4-SALA-9B-8bit-mlx --local-dir ./models/MiniCPM-SALA-8bit
# Run text generation
cargo run --release -p minicpm-sala-mlx --example generate -- \
./models/MiniCPM-SALA-8bit "Explain the theory of relativity."# Run interactive chat
cargo run --release -p minicpm-sala-mlx --example chat -- \
./models/MiniCPM-SALA-8bit --no-think
# Start OpenAI-compatible API server
cargo run --release -p minicpm-sala-mlx --example server -- \
--model ./models/MiniCPM-SALA-8bit --port 8080 --no-think
API Endpoints:
Method
Endpoint
Description
POST
/v1/chat/completions
OpenAI-compatible chat completion
GET
/v1/models
List models with metadata (path, size, quantization, loaded status)
POST
/v1/models/download
Download a model from HuggingFace
DELETE
/v1/models/{id}
Delete a downloaded model
GET
/health
Health check
Example API calls:
# Chat completion
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{ "model": "minicpm-sala-9b", "messages": [{"role": "user", "content": "Hello!"}], "temperature": 0.7, "max_tokens": 256 }'# Download a model
curl -X POST http://localhost:8080/v1/models/download \
-H "Content-Type: application/json" \
-d '{"repo_id": "moxin-org/MiniCPM4-SALA-9B-8bit-mlx"}'# List models
curl http://localhost:8080/v1/models
# Delete a model
curl -X DELETE http://localhost:8080/v1/models/MiniCPM4-SALA-9B-8bit-mlx
Qwen3-ASR (Speech Recognition)
# Download model (1.7B 8-bit recommended, 2.46 GB)
huggingface-cli download mlx-community/Qwen3-ASR-1.7B-8bit \
--local-dir ~/.OminiX/models/qwen3-asr-1.7b
# Transcribe audio
cargo run --release -p qwen3-asr-mlx --example transcribe -- audio.wav
# Specify language
cargo run --release -p qwen3-asr-mlx --example transcribe -- audio.wav --language English
# Use 0.6B model (faster, 1.01 GB)
huggingface-cli download mlx-community/Qwen3-ASR-0.6B-8bit \
--local-dir ~/.OminiX/models/qwen3-asr-0.6b
cargo run --release -p qwen3-asr-mlx --example transcribe -- \
~/.OminiX/models/qwen3-asr-0.6b audio.wav
OminiX-API (Unified API Server)
Single HTTP server exposing ASR, TTS, LLM, and OCR through OpenAI-compatible REST endpoints. See the ominix-api README for full documentation.
See the qwen3-tts-mlx README for full API documentation, all synthesis modes, supported speakers/languages, and performance benchmarks.
Image Generation
# Download Z-Image model
huggingface-cli download uqer1244/MLX-z-image --local-dir ./models/zimage-turbo-mlx
# Generate image with Z-Image
cargo run --release -p zimage-mlx --example generate_zimage -- "a cat sitting on a couch"# Download FLUX.2-klein model
huggingface-cli download black-forest-labs/FLUX.2-klein-4B --local-dir ./models/flux-klein
# Generate image with FLUX.2-klein
cargo run --release -p flux-klein-mlx --example generate_klein -- "a beautiful sunset over mountains"