mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 14:22:08 +00:00
Compare commits
base: huggingface:main
huggingface:main
huggingface:add_vlm_chunking
huggingface:add_chunked_atn
huggingface:flashinfer-0.2.5
huggingface:gaudi/add-ci
huggingface:kvrouter-endpoints
huggingface:add_chunked_attn
huggingface:chunked_attn_l4
huggingface:git_v3.2.3
huggingface:git_v3.2.2
huggingface:add_L4
huggingface:aiter_kernels
huggingface:message-more-info
huggingface:improve-tool-call-and-response-ids
huggingface:pr-2982-ci-branch
huggingface:enable-transformers-vlm
huggingface:git_3.2.1
huggingface:debugging-timeouts
huggingface:use_updated_kernels
huggingface:git_3.2.0
huggingface:origin/slind_window_fix
huggingface:no_root_user2
huggingface:no_root_user
huggingface:git_3.1.1
huggingface:tmp_invariants
huggingface:proxy_sse_engine_state
huggingface:pr-2954-ci-branch
huggingface:neuron_backend_ci_test
huggingface:zstd
huggingface:pr-3018-ci-branch
huggingface:pr-3002-ci-branch
huggingface:adjust-mllama-test-output
huggingface:pr-3004-ci-branch
huggingface:triton_fix
huggingface:git_v3.1.0
huggingface:kvrouter
huggingface:vllm/setup
huggingface:gha_sccache_use_secrets
huggingface:add_deepseekv3
huggingface:more_logs
huggingface:fix-tp
huggingface:git_v3.0.2
huggingface:new_minor_version
huggingface:enable-qwen2vl-video
huggingface:response-header-metrics
huggingface:nix/pytorch-2.5.1
huggingface:baichuan2-13b
huggingface:update-jsonschema
huggingface:ci-update_xpu_image
huggingface:pr-2840-ci-branch
huggingface:fix_fp8_llama3.2
huggingface:feat-backend-llamacpp
huggingface:s3-cache
huggingface:git_v3.0.1
huggingface:trtllm/executor_stats
huggingface:git_v3.0.0
huggingface:pr-2784-ci-branch
huggingface:git_v2.4.1
huggingface:ipex-moe
huggingface:pr-2711-ci-branch
huggingface:trtllm-stop-words
huggingface:upgrade-outlines
huggingface:git_v2.4.0
huggingface:auto_length
huggingface:close_dl_thread
huggingface:adjust-where-request-max-tokens-is-defaulted
huggingface:feature/machete
huggingface:add-test-for-warmup-and-kvcache
huggingface:add-rotary-embed-tests
huggingface:bugfix/moe-kernels-imports
huggingface:add-google-cloud-provider
huggingface:add_tunable_prefill
huggingface:enable-non-grammar-constrained-tools
huggingface:git_v2.3.1
huggingface:nix/docker2
huggingface:ci_amd
huggingface:ci_amd4
huggingface:fp8_rocm
huggingface:cuda_ipc_allreduce
huggingface:mllama
huggingface:rocm_6.2_fixes
huggingface:tuna
huggingface:git_v2.3.0
huggingface:nix_integration_tests
huggingface:feature/moe-kernels
huggingface:prefix_chunk
huggingface:pr-2444-ci-branch
huggingface:pr-2517-ci-branch
huggingface:nix_test2
huggingface:feature/vlm-prefix-caching
huggingface:feat/add-load-test
huggingface:upgrade_mlp_speculator
huggingface:avoid-cuda-graph-during-warmup-if-oom
huggingface:fix/op-trace-id
huggingface:prefix_default
huggingface:fix_exl2
huggingface:exl2
huggingface:fix-repack-for-marlin
huggingface:fix-release-tests
huggingface:explore-t4-gemma-issues
huggingface:feature/radix-prefix-cache
huggingface:nix/cargo-clippy
huggingface:pr-2366-ci-branch
huggingface:feature/radix-prefix-cache-bench
huggingface:inlcude-latest-release-on-commit-builds-tags
huggingface:feature/no_repeat_ngram_size_ci
huggingface:fix/parse-mamba-config
huggingface:backends/trtllm-executor
huggingface:pr-2290-ci-runner
huggingface:feature/no_repeat_ngram_size
huggingface:add_api_key
huggingface:use_g6
huggingface:feat/max_queue_size
huggingface:git_v2.2.0
huggingface:debug/gemma2
huggingface:fix_mistral2
huggingface:refactor-lora-linear
huggingface:development-guide
huggingface:feature/prefix
huggingface:ci_amd3
huggingface:experiment/moe
huggingface:fix/allow-top-p-0
huggingface:ci-new-cluster
huggingface:git_v2.1.1
huggingface:add-small-ttft-script
huggingface:test-batch-speedup-amount
huggingface:fp8_kvcache
huggingface:add-chat-response-format
huggingface:fix-mixtral-adapter-loading
huggingface:backends/trtllm
huggingface:git_v2.1.0
huggingface:automodel-supports-flash-paged-attention
huggingface:feat/backend_abstraction
huggingface:ci2
huggingface:temp_work
huggingface:bugfix/phi-exl2
huggingface:multi-lora
huggingface:debug-torch-23
huggingface:feat/backend_feature
huggingface:pr-2076-ci-run
huggingface:maintenance/docker-network
huggingface:ci-patch
huggingface:feat/page_re_alloc
huggingface:feature/phi-3-small
huggingface:debug-gpt2
huggingface:amd-ci-fx
huggingface:ci_amd2
huggingface:support-phi3-small
huggingface:set-num-blocks
huggingface:rocm-ci-build
huggingface:ci-xpu2
huggingface:maintenance/merge-vlm-input-prep
huggingface:fix_phi3
huggingface:precompile-kernels-workflow
huggingface:support-pre-compile-kernels
huggingface:pip-installable
huggingface:flashinfer
huggingface:update_internal_version
huggingface:git_2.0.4
huggingface:pr-1869-ci-run
huggingface:fix-cudagraph-bug
huggingface:fix-version-install
huggingface:improve-dynamic-message-content
huggingface:ci-run-openai-function-calling-compatible-support
huggingface:mi300-temp
huggingface:dummy
huggingface:ci-xpu
huggingface:bugfix/add_tools_prompt
huggingface:martinigoyanes-fix-frequency-penalty
huggingface:add-quickstart-script
huggingface:skip-mistral-test
huggingface:fix_default_arg
huggingface:fix-grammar-cleanup-bug
huggingface:llama-fused-compiled-mlp
huggingface:explore-static-triton-kernels
huggingface:improve_defaults
huggingface:tmp_medusa
huggingface:improve_launcher_defaults
huggingface:op-compilation-benchmarking
huggingface:fix-grammar-fsm-batching
huggingface:tmp_torch_compile
huggingface:test_rocm
huggingface:router-grammar-compile
huggingface:bump-client-0.6.2
huggingface:avoid-zero-seed
huggingface:adding_docs
huggingface:feat/flash_decoding
huggingface:fix-gemma-tokenization
huggingface:ci-amihalik-update-chat-completion-messages
huggingface:add_batch_dimension
huggingface:update_peft
huggingface:add_readme_dashboard
huggingface:revert
huggingface:debug-request-id
huggingface:update_readme
huggingface:mamba2
huggingface:fix_neox_rotary_emb
huggingface:bump-poetry-and-requirements
huggingface:update_docs2
huggingface:support-phi-model
huggingface:fix/avoid_record_streams
huggingface:medusa
huggingface:feat/cuda_12
huggingface:feat/attention_sinks
huggingface:dev
huggingface:speculative
huggingface:self-generating-docs
huggingface:improve-docs
huggingface:fix_leak
huggingface:streaming_conceptual
huggingface:test_docs
huggingface:osanseviero-patch-1
huggingface:model_compat_log
huggingface:compat_logger
huggingface:simpler_exllama
huggingface:bnb4
huggingface:enable_non_divisible_embeddings
huggingface:feat/better_tokens
huggingface:add_integration_test
huggingface:add_gptq_docs
huggingface:quantization
huggingface:remove_post_load_weights
huggingface:feat/improve_max_tokens
huggingface:feat/parse_logs
huggingface:deploy/aml
huggingface:megatron
huggingface:feat/support_deepspeed
huggingface:v3.2.3
huggingface:v3.2.2
huggingface:v3.2.1
huggingface:v3.2.0
huggingface:v3.1.1
huggingface:v3.1.0
huggingface:v3.0.2
huggingface:v3.0.1
huggingface:v3.0.0
huggingface:v2.4.1
huggingface:v2.4.0
huggingface:v2.3.1
huggingface:v2.3.0
huggingface:v2.2.0
huggingface:v2.1.1
huggingface:v2.1.0
huggingface:v2.0.4
huggingface:v2.0.3
huggingface:v2.0.2
huggingface:v2.0.1
huggingface:v2.0.0
huggingface:v1.4.5
huggingface:v1.4.4
huggingface:v1.4.3
huggingface:v1.4.2
huggingface:v1.4.1
huggingface:v1.4.0
huggingface:v1.3.4
huggingface:v1.3.3
huggingface:v1.3.2
huggingface:v1.3.1
huggingface:v1.3.0
huggingface:v1.2.0
huggingface:v1.1.1
huggingface:v1.1.0
huggingface:v1.0.3
huggingface:v1.0.2
huggingface:v1.0.1
huggingface:v1.0.0
huggingface:v0.9.4
huggingface:v0.9.3
huggingface:v0.9.2
huggingface:v0.9.1
huggingface:v0.9.0
huggingface:v0.8.2
huggingface:v0.8.1
huggingface:v0.8.0
huggingface:v0.7.0
huggingface:v0.6.0
huggingface:v0.5.0
huggingface:v0.4.3
huggingface:v0.4.2
huggingface:v0.4.1
huggingface:v0.4.0
huggingface:v0.3.2
huggingface:v0.3.1
huggingface:v0.3.0
huggingface:v0.2.1
huggingface:v0.2.0
...
compare: huggingface:v2.0.2
huggingface:add_vlm_chunking
huggingface:main
huggingface:add_chunked_atn
huggingface:flashinfer-0.2.5
huggingface:gaudi/add-ci
huggingface:kvrouter-endpoints
huggingface:add_chunked_attn
huggingface:chunked_attn_l4
huggingface:git_v3.2.3
huggingface:git_v3.2.2
huggingface:add_L4
huggingface:aiter_kernels
huggingface:message-more-info
huggingface:improve-tool-call-and-response-ids
huggingface:pr-2982-ci-branch
huggingface:enable-transformers-vlm
huggingface:git_3.2.1
huggingface:debugging-timeouts
huggingface:use_updated_kernels
huggingface:git_3.2.0
huggingface:origin/slind_window_fix
huggingface:no_root_user2
huggingface:no_root_user
huggingface:git_3.1.1
huggingface:tmp_invariants
huggingface:proxy_sse_engine_state
huggingface:pr-2954-ci-branch
huggingface:neuron_backend_ci_test
huggingface:zstd
huggingface:pr-3018-ci-branch
huggingface:pr-3002-ci-branch
huggingface:adjust-mllama-test-output
huggingface:pr-3004-ci-branch
huggingface:triton_fix
huggingface:git_v3.1.0
huggingface:kvrouter
huggingface:vllm/setup
huggingface:gha_sccache_use_secrets
huggingface:add_deepseekv3
huggingface:more_logs
huggingface:fix-tp
huggingface:git_v3.0.2
huggingface:new_minor_version
huggingface:enable-qwen2vl-video
huggingface:response-header-metrics
huggingface:nix/pytorch-2.5.1
huggingface:baichuan2-13b
huggingface:update-jsonschema
huggingface:ci-update_xpu_image
huggingface:pr-2840-ci-branch
huggingface:fix_fp8_llama3.2
huggingface:feat-backend-llamacpp
huggingface:s3-cache
huggingface:git_v3.0.1
huggingface:trtllm/executor_stats
huggingface:git_v3.0.0
huggingface:pr-2784-ci-branch
huggingface:git_v2.4.1
huggingface:ipex-moe
huggingface:pr-2711-ci-branch
huggingface:trtllm-stop-words
huggingface:upgrade-outlines
huggingface:git_v2.4.0
huggingface:auto_length
huggingface:close_dl_thread
huggingface:adjust-where-request-max-tokens-is-defaulted
huggingface:feature/machete
huggingface:add-test-for-warmup-and-kvcache
huggingface:add-rotary-embed-tests
huggingface:bugfix/moe-kernels-imports
huggingface:add-google-cloud-provider
huggingface:add_tunable_prefill
huggingface:enable-non-grammar-constrained-tools
huggingface:git_v2.3.1
huggingface:nix/docker2
huggingface:ci_amd
huggingface:ci_amd4
huggingface:fp8_rocm
huggingface:cuda_ipc_allreduce
huggingface:mllama
huggingface:rocm_6.2_fixes
huggingface:tuna
huggingface:git_v2.3.0
huggingface:nix_integration_tests
huggingface:feature/moe-kernels
huggingface:prefix_chunk
huggingface:pr-2444-ci-branch
huggingface:pr-2517-ci-branch
huggingface:nix_test2
huggingface:feature/vlm-prefix-caching
huggingface:feat/add-load-test
huggingface:upgrade_mlp_speculator
huggingface:avoid-cuda-graph-during-warmup-if-oom
huggingface:fix/op-trace-id
huggingface:prefix_default
huggingface:fix_exl2
huggingface:exl2
huggingface:fix-repack-for-marlin
huggingface:fix-release-tests
huggingface:explore-t4-gemma-issues
huggingface:feature/radix-prefix-cache
huggingface:nix/cargo-clippy
huggingface:pr-2366-ci-branch
huggingface:feature/radix-prefix-cache-bench
huggingface:inlcude-latest-release-on-commit-builds-tags
huggingface:feature/no_repeat_ngram_size_ci
huggingface:fix/parse-mamba-config
huggingface:backends/trtllm-executor
huggingface:pr-2290-ci-runner
huggingface:feature/no_repeat_ngram_size
huggingface:add_api_key
huggingface:use_g6
huggingface:feat/max_queue_size
huggingface:git_v2.2.0
huggingface:debug/gemma2
huggingface:fix_mistral2
huggingface:refactor-lora-linear
huggingface:development-guide
huggingface:feature/prefix
huggingface:ci_amd3
huggingface:experiment/moe
huggingface:fix/allow-top-p-0
huggingface:ci-new-cluster
huggingface:git_v2.1.1
huggingface:add-small-ttft-script
huggingface:test-batch-speedup-amount
huggingface:fp8_kvcache
huggingface:add-chat-response-format
huggingface:fix-mixtral-adapter-loading
huggingface:backends/trtllm
huggingface:git_v2.1.0
huggingface:automodel-supports-flash-paged-attention
huggingface:feat/backend_abstraction
huggingface:ci2
huggingface:temp_work
huggingface:bugfix/phi-exl2
huggingface:multi-lora
huggingface:debug-torch-23
huggingface:feat/backend_feature
huggingface:pr-2076-ci-run
huggingface:maintenance/docker-network
huggingface:ci-patch
huggingface:feat/page_re_alloc
huggingface:feature/phi-3-small
huggingface:debug-gpt2
huggingface:amd-ci-fx
huggingface:ci_amd2
huggingface:support-phi3-small
huggingface:set-num-blocks
huggingface:rocm-ci-build
huggingface:ci-xpu2
huggingface:maintenance/merge-vlm-input-prep
huggingface:fix_phi3
huggingface:precompile-kernels-workflow
huggingface:support-pre-compile-kernels
huggingface:pip-installable
huggingface:flashinfer
huggingface:update_internal_version
huggingface:git_2.0.4
huggingface:pr-1869-ci-run
huggingface:fix-cudagraph-bug
huggingface:fix-version-install
huggingface:improve-dynamic-message-content
huggingface:ci-run-openai-function-calling-compatible-support
huggingface:mi300-temp
huggingface:dummy
huggingface:ci-xpu
huggingface:bugfix/add_tools_prompt
huggingface:martinigoyanes-fix-frequency-penalty
huggingface:add-quickstart-script
huggingface:skip-mistral-test
huggingface:fix_default_arg
huggingface:fix-grammar-cleanup-bug
huggingface:llama-fused-compiled-mlp
huggingface:explore-static-triton-kernels
huggingface:improve_defaults
huggingface:tmp_medusa
huggingface:improve_launcher_defaults
huggingface:op-compilation-benchmarking
huggingface:fix-grammar-fsm-batching
huggingface:tmp_torch_compile
huggingface:test_rocm
huggingface:router-grammar-compile
huggingface:bump-client-0.6.2
huggingface:avoid-zero-seed
huggingface:adding_docs
huggingface:feat/flash_decoding
huggingface:fix-gemma-tokenization
huggingface:ci-amihalik-update-chat-completion-messages
huggingface:add_batch_dimension
huggingface:update_peft
huggingface:add_readme_dashboard
huggingface:revert
huggingface:debug-request-id
huggingface:update_readme
huggingface:mamba2
huggingface:fix_neox_rotary_emb
huggingface:bump-poetry-and-requirements
huggingface:update_docs2
huggingface:support-phi-model
huggingface:fix/avoid_record_streams
huggingface:medusa
huggingface:feat/cuda_12
huggingface:feat/attention_sinks
huggingface:dev
huggingface:speculative
huggingface:self-generating-docs
huggingface:improve-docs
huggingface:fix_leak
huggingface:streaming_conceptual
huggingface:test_docs
huggingface:osanseviero-patch-1
huggingface:model_compat_log
huggingface:compat_logger
huggingface:simpler_exllama
huggingface:bnb4
huggingface:enable_non_divisible_embeddings
huggingface:feat/better_tokens
huggingface:add_integration_test
huggingface:add_gptq_docs
huggingface:quantization
huggingface:remove_post_load_weights
huggingface:feat/improve_max_tokens
huggingface:feat/parse_logs
huggingface:deploy/aml
huggingface:megatron
huggingface:feat/support_deepspeed
huggingface:v3.2.3
huggingface:v3.2.2
huggingface:v3.2.1
huggingface:v3.2.0
huggingface:v3.1.1
huggingface:v3.1.0
huggingface:v3.0.2
huggingface:v3.0.1
huggingface:v3.0.0
huggingface:v2.4.1
huggingface:v2.4.0
huggingface:v2.3.1
huggingface:v2.3.0
huggingface:v2.2.0
huggingface:v2.1.1
huggingface:v2.1.0
huggingface:v2.0.4
huggingface:v2.0.3
huggingface:v2.0.2
huggingface:v2.0.1
huggingface:v2.0.0
huggingface:v1.4.5
huggingface:v1.4.4
huggingface:v1.4.3
huggingface:v1.4.2
huggingface:v1.4.1
huggingface:v1.4.0
huggingface:v1.3.4
huggingface:v1.3.3
huggingface:v1.3.2
huggingface:v1.3.1
huggingface:v1.3.0
huggingface:v1.2.0
huggingface:v1.1.1
huggingface:v1.1.0
huggingface:v1.0.3
huggingface:v1.0.2
huggingface:v1.0.1
huggingface:v1.0.0
huggingface:v0.9.4
huggingface:v0.9.3
huggingface:v0.9.2
huggingface:v0.9.1
huggingface:v0.9.0
huggingface:v0.8.2
huggingface:v0.8.1
huggingface:v0.8.0
huggingface:v0.7.0
huggingface:v0.6.0
huggingface:v0.5.0
huggingface:v0.4.3
huggingface:v0.4.2
huggingface:v0.4.1
huggingface:v0.4.0
huggingface:v0.3.2
huggingface:v0.3.1
huggingface:v0.3.0
huggingface:v0.2.1
huggingface:v0.2.0
The selected branch/tag are equal.