mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Commit Graph
Select branches
Hide Pull Requests
20250708-ci-fixes
add-chat-response-format
add-google-cloud-provider
add-quickstart-script
add-rotary-embed-tests
add-small-ttft-script
add-test-for-warmup-and-kvcache
add_L4
add_api_key
add_batch_dimension
add_chunked_atn
add_chunked_attn
add_deepseekv3
add_gptq_docs
add_integration_test
add_readme_dashboard
add_tunable_prefill
add_vlm_chunking
adding_docs
adjust-mllama-test-output
adjust-where-request-max-tokens-is-defaulted
aiter_kernels
amd-ci-fx
auto_length
automodel-supports-flash-paged-attention
avoid-cuda-graph-during-warmup-if-oom
avoid-zero-seed
backends/trtllm
backends/trtllm-executor
baichuan2-13b
bnb4
bugfix/add_tools_prompt
bugfix/moe-kernels-imports
bugfix/phi-exl2
bump-client-0.6.2
bump-kernel-versions
bump-poetry-and-requirements
chunked_attn_l4
ci-amihalik-update-chat-completion-messages
ci-new-cluster
ci-patch
ci-run-openai-function-calling-compatible-support
ci-update_xpu_image
ci-xpu
ci-xpu2
ci2
ci_amd
ci_amd2
ci_amd3
ci_amd4
close_dl_thread
compat_logger
cuda_ipc_allreduce
debug-gpt2
debug-request-id
debug-torch-23
debug/gemma2
debugging-timeouts
deploy/aml
dev
development-guide
dummy
enable-non-grammar-constrained-tools
enable-qwen2vl-video
enable-transformers-vlm
enable_non_divisible_embeddings
exl2
experiment/moe
explore-static-triton-kernels
explore-t4-gemma-issues
feat-backend-llamacpp
feat/add-load-test
feat/attention_sinks
feat/backend_abstraction
feat/backend_feature
feat/better_tokens
feat/cuda_12
feat/flash_decoding
feat/improve_max_tokens
feat/max_queue_size
feat/page_re_alloc
feat/parse_logs
feat/support_deepspeed
feature/machete
feature/moe-kernels
feature/no_repeat_ngram_size
feature/no_repeat_ngram_size_ci
feature/phi-3-small
feature/prefix
feature/radix-prefix-cache
feature/radix-prefix-cache-bench
feature/vlm-prefix-caching
fix-cudagraph-bug
fix-gemma-tokenization
fix-grammar-cleanup-bug
fix-grammar-fsm-batching
fix-mixtral-adapter-loading
fix-release-tests
fix-repack-for-marlin
fix-tool-call-def
fix-tp
fix-version-install
fix/allow-top-p-0
fix/avoid_record_streams
fix/op-trace-id
fix/parse-mamba-config
fix_default_arg
fix_exl2
fix_fp8_llama3.2
fix_leak
fix_mistral2
fix_neox_rotary_emb
fix_phi3
flashinfer
flashinfer-0.2.5
fp8_kvcache
fp8_rocm
gaudi_llama4_tmp
gha_sccache_use_secrets
git_2.0.4
git_3.1.1
git_3.2.0
git_3.2.1
git_3.3.0
git_3.3.1
git_3.3.2
git_v2.1.0
git_v2.1.1
git_v2.2.0
git_v2.3.0
git_v2.3.1
git_v2.4.0
git_v2.4.1
git_v3.0.0
git_v3.0.1
git_v3.0.2
git_v3.1.0
git_v3.2.2
git_v3.2.3
git_v3.3.3
git_v3.3.4
git_v3.3.5
improve-docs
improve-dynamic-message-content
improve-json-schema-field
improve-tool-call-and-response-ids
improve_defaults
improve_launcher_defaults
inlcude-latest-release-on-commit-builds-tags
ipex-moe
kvrouter
kvrouter-endpoints
llama-fused-compiled-mlp
main
maintenance/docker-network
maintenance/merge-vlm-input-prep
mamba2
martinigoyanes-fix-frequency-penalty
medusa
megatron
message-more-info
mi300-temp
mllama
model_compat_log
more_logs
multi-lora
new_minor_version
nix/cargo-clippy
nix/docker2
nix/pytorch-2.5.1
nix_integration_tests
nix_test2
no_root_user
no_root_user2
op-compilation-benchmarking
origin/slind_window_fix
osanseviero-patch-1
patch_version_3.3.6
pip-installable
pr-1869-ci-run
pr-2076-ci-run
pr-2290-ci-runner
pr-2366-ci-branch
pr-2444-ci-branch
pr-2517-ci-branch
pr-2711-ci-branch
pr-2784-ci-branch
pr-2840-ci-branch
pr-2954-ci-branch
pr-3002-ci-branch
pr-3004-ci-branch
pr-3018-ci-branch
precompile-kernels-workflow
prefix_chunk
prefix_default
proxy_sse_engine_state
quantization
quantization-0.1
refactor-lora-linear
release-3.2.4
remove_post_load_weights
response-header-metrics
revert
rocm-ci-build
rocm_6.2_fixes
router-grammar-compile
s3-cache
self-generating-docs
set-num-blocks
simpler_exllama
skip-mistral-test
speculative
streaming_conceptual
support-granite-vision
support-logit-bias-in-chat
support-phi-model
support-phi3-small
support-pre-compile-kernels
temp_work
test-batch-speedup-amount
test_docs
test_rocm
tmp_invariants
tmp_medusa
tmp_torch_compile
transformers-ci
triton_fix
trtllm-stop-words
trtllm/executor_stats
tuna
update-jsonschema
update_docs2
update_internal_version
update_peft
update_readme
upgrade-outlines
upgrade_mlp_speculator
use_g6
use_updated_kernels
vllm/setup
zstd
#1
#100
#101
#1010
#1018
#1019
#102
#1022
#1023
#1024
#103
#1033
#1034
#1042
#1044
#1045
#1048
#1049
#1052
#1054
#1058
#1059
#106
#1060
#1061
#1063
#1064
#1065
#1066
#1068
#107
#1070
#1071
#1075
#1076
#1077
#108
#1080
#1081
#1089
#109
#1090
#1091
#1092
#1094
#1096
#1097
#1099
#11
#110
#1100
#1101
#1102
#1103
#1105
#1110
#1112
#1116
#1123
#1128
#1134
#114
#1140
#1141
#115
#1153
#1155
#116
#1165
#1165
#117
#1173
#1176
#1178
#1179
#118
#1182
#1183
#1184
#1187
#119
#1198
#1202
#1211
#1214
#1219
#122
#1224
#1228
#123
#1239
#1241
#1242
#1243
#1246
#1252
#126
#1260
#1267
#1270
#1272
#1274
#1276
#1279
#128
#1285
#1287
#129
#1294
#1295
#13
#130
#1301
#1305
#1307
#1308
#1313
#132
#1326
#1328
#133
#1336
#1337
#134
#1341
#1343
#1346
#1347
#1348
#135
#1351
#1352
#1353
#1358
#136
#1361
#1364
#137
#1370
#1373
#138
#1381
#1386
#139
#1390
#1395
#14
#140
#1408
#141
#1414
#1419
#142
#1420
#1424
#1425
#1427
#1428
#143
#1436
#144
#1442
#1448
#145
#1450
#1453
#1454
#1455
#1459
#1461
#1462
#1463
#1469
#147
#1470
#1471
#1473
#1475
#1476
#1477
#1478
#148
#1480
#1484
#1486
#1488
#1489
#149
#1490
#1491
#1492
#1494
#1495
#1496
#1497
#1498
#15
#150
#1502
#1504
#1505
#1506
#151
#1511
#1512
#1514
#1515
#1516
#1517
#1518
#152
#1520
#1523
#1524
#1526
#1527
#153
#1532
#1533
#1537
#1539
#154
#1540
#1541
#1542
#1543
#155
#1550
#1552
#1555
#1556
#1557
#1560
#1563
#1564
#1567
#1568
#1569
#1570
#1571
#1576
#1577
#1578
#1579
#1580
#1583
#1584
#1585
#1586
#1587
#1588
#159
#1591
#1592
#1594
#16
#160
#1603
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1614
#1617
#1618
#1619
#162
#1621
#1626
#1628
#163
#1632
#1637
#1638
#1639
#164
#1646
#1648
#1650
#1651
#1653
#1658
#1660
#1662
#1663
#1664
#1666
#1667
#1668
#167
#1676
#168
#1682
#1685
#1686
#1693
#1697
#1698
#17
#170
#1702
#1703
#1704
#1707
#1708
#1709
#1710
#1713
#1714
#1715
#1716
#1718
#1719
#1726
#1727
#1729
#173
#1730
#1731
#1734
#1735
#1736
#1737
#1739
#174
#1740
#1747
#1748
#1749
#175
#1751
#1755
#1756
#1758
#1759
#1760
#1764
#1765
#1767
#1768
#1773
#1774
#1775
#178
#1784
#1789
#179
#1790
#1791
#1797
#1798
#18
#180
#1800
#1801
#1808
#181
#1811
#1812
#1813
#1815
#1816
#1817
#1818
#1820
#1825
#1827
#1828
#1829
#183
#1830
#1832
#1833
#1835
#1836
#1839
#184
#1840
#1841
#1843
#1844
#1845
#1848
#1849
#185
#1850
#1851
#1854
#1855
#186
#1860
#1865
#1866
#1869
#187
#1882
#1884
#1886
#1888
#1889
#1890
#1892
#1894
#1895
#1898
#19
#190
#1902
#1906
#1908
#1909
#191
#1910
#1912
#1915
#1916
#1917
#1918
#1919
#1920
#1921
#1923
#1924
#1925
#1929
#193
#1931
#1932
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1942
#1947
#1948
#1949
#1950
#1951
#1953
#1954
#1958
#1959
#196
#1963
#1965
#1967
#1970
#1971
#1975
#1980
#1981
#1985
#1986
#1988
#1989
#1990
#1994
#1995
#1996
#2
#20
#200
#2002
#2003
#2004
#2005
#2006
#2008
#201
#2010
#2011
#2013
#2014
#2015
#2016
#2017
#202
#2020
#2021
#2022
#2023
#2024
#2028
#203
#2031
#2032
#2033
#2034
#2038
#2039
#2044
#2045
#2046
#2047
#2049
#205
#2050
#2052
#2054
#2059
#2060
#2061
#2062
#2063
#2065
#2066
#2068
#207
#2071
#2072
#2074
#2075
#2076
#2078
#2079
#208
#2080
#2084
#2085
#2086
#2087
#2088
#2089
#2091
#2092
#2094
#2095
#2097
#2099
#210
#2101
#2102
#2103
#2104
#2105
#2109
#2110
#2111
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2123
#2124
#2125
#2127
#2128
#2129
#213
#2131
#2132
#2133
#2134
#2135
#2137
#2138
#214
#2140
#2141
#2142
#2148
#2149
#215
#2151
#2152
#2153
#2155
#2156
#2158
#216
#2161
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2173
#2175
#2176
#2178
#2179
#218
#2180
#2181
#2182
#2186
#2187
#2189
#219
#2190
#2191
#2193
#2194
#2196
#22
#220
#2201
#2202
#2203
#2204
#2205
#2208
#2209
#221
#2212
#2213
#2215
#2216
#2217
#222
#2220
#2221
#2224
#2225
#2226
#2228
#2230
#2231
#2232
#2233
#2237
#2242
#2243
#2244
#2245
#2248
#2249
#2250
#2251
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2266
#2268
#2269
#227
#2271
#2272
#2273
#2274
#2276
#2277
#2278
#2279
#228
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#2290
#2291
#2292
#2298
#2299
#23
#2300
#2303
#2304
#2306
#2307
#2308
#2309
#2311
#2311
#2313
#2314
#2315
#2317
#2320
#2323
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2333
#2335
#2336
#2337
#2338
#2339
#234
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#235
#2350
#2351
#2352
#2353
#2354
#2357
#2358
#2359
#2360
#2361
#2364
#2365
#2366
#2367
#2368
#237
#2370
#2371
#2372
#2374
#2377
#2378
#2379
#2381
#2382
#2384
#2385
#2386
#2387
#2389
#2390
#2391
#2392
#2394
#2395
#2396
#2397
#2398
#2399
#24
#2401
#2402
#2403
#2404
#2405
#2406
#2407
#2408
#2410
#2411
#2412
#2414
#2415
#2416
#2419
#242
#2420
#2422
#2423
#2424
#2426
#2427
#2428
#2429
#2430
#2431
#2433
#2437
#2438
#2439
#244
#2442
#2443
#2444
#2449
#2450
#2451
#2453
#2454
#2455
#2459
#246
#2462
#2463
#2468
#2469
#2470
#2471
#2472
#2473
#2477
#2478
#2479
#248
#2481
#2482
#2484
#2486
#2489
#2490
#2491
#2492
#2493
#2494
#2496
#2497
#2498
#2499
#25
#250
#2500
#2501
#2507
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#252
#2520
#2521
#2524
#2525
#2527
#2528
#2529
#2532
#2533
#2535
#2536
#2537
#2538
#2539
#2540
#2545
#2546
#2547
#2548
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2561
#2562
#2563
#2566
#2567
#2568
#257
#2574
#2575
#2577
#2578
#2579
#258
#2580
#2582
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2592
#2594
#2595
#2596
#2597
#2599
#26
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2609
#261
#2610
#2611
#2612
#2614
#2616
#2617
#2619
#262
#2620
#2622
#2623
#2625
#2627
#2628
#2629
#2630
#2631
#2632
#2633
#2634
#2637
#264
#2640
#2642
#2642
#2645
#2646
#2647
#2648
#2650
#2651
#2652
#2655
#2658
#2659
#266
#2661
#2663
#2664
#2665
#2666
#2668
#267
#2673
#2674
#2677
#2678
#2680
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2694
#2695
#2697
#2698
#2699
#27
#2701
#2702
#2704
#2706
#2707
#2708
#2709
#2710
#2711
#2712
#2713
#2714
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2723
#2724
#2725
#2726
#2732
#2733
#2738
#274
#2740
#2741
#2742
#2743
#2745
#2746
#275
#2750
#2751
#2753
#2754
#2755
#2756
#2756
#2758
#276
#2760
#2761
#2762
#2764
#2765
#2766
#2767
#2768
#2769
#277
#2770
#2771
#2772
#2773
#2774
#2777
#2778
#2779
#278
#2782
#2784
#2785
#2786
#2788
#2789
#2790
#2791
#2792
#2793
#2795
#2796
#2797
#2798
#2799
#28
#2802
#2803
#2806
#2807
#2808
#2809
#2810
#2811
#2812
#2814
#2815
#2816
#2817
#2818
#282
#2824
#2825
#2826
#2827
#2829
#2831
#2833
#2834
#2837
#284
#2840
#2841
#2845
#2846
#2848
#2848
#2849
#285
#2850
#2852
#2855
#2856
#2858
#2859
#286
#2861
#2862
#2863
#2866
#2866
#287
#2870
#2874
#2874
#2878
#2882
#2883
#2884
#2885
#2886
#2891
#2892
#2893
#2898
#29
#2901
#2903
#2904
#2905
#2907
#2908
#2910
#2911
#2912
#2913
#2916
#2917
#2918
#2919
#292
#2921
#2922
#2924
#2928
#2929
#2931
#2932
#2933
#2935
#2936
#2937
#2938
#294
#2940
#2941
#2942
#2943
#2945
#2947
#2948
#2949
#2950
#2951
#2953
#2954
#2957
#2958
#2960
#2961
#2962
#2963
#2964
#2964
#2965
#2965
#2966
#2967
#2968
#297
#2970
#2971
#2972
#2975
#2976
#2976
#2977
#2979
#298
#2980
#2981
#2982
#2983
#2988
#299
#2990
#2991
#2992
#2993
#2995
#2996
#2998
#2999
#30
#3000
#3001
#3002
#3003
#3004
#3006
#3007
#3008
#3010
#3012
#3012
#3013
#3015
#3016
#3017
#3018
#302
#3021
#3022
#3024
#3027
#3028
#3029
#303
#3030
#3031
#3032
#3033
#3034
#3034
#3035
#3036
#3037
#3039
#304
#3040
#3041
#3042
#3044
#3045
#3046
#3047
#3048
#3049
#305
#3051
#3053
#3054
#3055
#3056
#3057
#3060
#3061
#3063
#3063
#3064
#3064
#3065
#3065
#3067
#3068
#3069
#3072
#3073
#3074
#3075
#3076
#3077
#3078
#3079
#3079
#308
#3080
#3081
#3083
#3084
#3085
#3086
#3089
#3091
#3092
#3093
#3094
#3095
#3096
#3098
#3099
#31
#310
#3100
#3101
#3103
#3106
#3107
#3108
#3109
#3111
#3111
#3112
#3113
#3116
#3117
#3118
#3120
#3121
#3122
#3124
#3125
#3126
#3127
#3128
#3129
#313
#3131
#3132
#3134
#3136
#3141
#3142
#3144
#3145
#3147
#3148
#3150
#3151
#3152
#3154
#3154
#3156
#3157
#3160
#3161
#3162
#3162
#3164
#3164
#3166
#3166
#3167
#317
#3170
#3171
#3172
#3174
#3176
#3179
#318
#3182
#3184
#3186
#3186
#3187
#3188
#3189
#3193
#3194
#3194
#3196
#3197
#3197
#32
#3200
#3201
#3201
#3202
#3204
#3206
#3207
#3210
#3210
#3211
#3217
#3218
#3220
#3221
#3222
#3223
#3224
#3229
#3230
#3231
#3231
#3232
#3234
#3235
#3236
#3237
#3238
#3239
#3240
#3242
#3244
#3245
#3246
#3249
#325
#3252
#3253
#3254
#3255
#3255
#3256
#3258
#3260
#3261
#3262
#3263
#3264
#3265
#3266
#3266
#3267
#3268
#3269
#327
#3270
#3271
#3273
#3274
#3276
#3276
#328
#3280
#3281
#3282
#3283
#3284
#3286
#3287
#3288
#3288
#329
#3291
#3291
#3292
#3292
#3297
#3298
#33
#3300
#3300
#3302
#3302
#3308
#3310
#3310
#3311
#3312
#3313
#3314
#3315
#3315
#3319
#3322
#3323
#3324
#3325
#3326
#3327
#334
#335
#336
#34
#340
#341
#343
#344
#348
#35
#351
#352
#353
#356
#357
#358
#359
#36
#360
#362
#363
#364
#367
#368
#37
#370
#373
#379
#384
#385
#388
#39
#393
#394
#395
#396
#4
#40
#400
#404
#406
#407
#41
#411
#412
#42
#434
#438
#44
#441
#443
#45
#453
#46
#462
#465
#47
#470
#472
#475
#477
#48
#480
#483
#485
#488
#49
#498
#5
#50
#501
#502
#51
#513
#514
#516
#519
#52
#520
#521
#522
#525
#529
#53
#534
#54
#543
#544
#545
#55
#550
#553
#557
#558
#56
#561
#562
#567
#57
#575
#578
#579
#58
#580
#581
#582
#583
#585
#586
#587
#588
#59
#590
#595
#596
#6
#60
#600
#605
#608
#609
#61
#611
#616
#617
#618
#619
#62
#621
#623
#624
#626
#63
#630
#633
#634
#635
#639
#64
#642
#643
#647
#648
#659
#66
#661
#664
#665
#666
#67
#670
#671
#678
#68
#684
#689
#698
#7
#70
#704
#708
#71
#712
#713
#715
#719
#72
#721
#723
#725
#727
#73
#733
#737
#738
#740
#741
#743
#745
#746
#748
#75
#750
#76
#761
#762
#767
#768
#770
#773
#783
#785
#789
#791
#793
#794
#795
#797
#798
#799
#8
#803
#805
#806
#809
#810
#812
#82
#820
#821
#822
#823
#829
#831
#836
#838
#84
#842
#848
#85
#851
#852
#853
#854
#858
#86
#860
#862
#867
#868
#87
#872
#88
#881
#884
#886
#889
#89
#892
#893
#898
#9
#90
#900
#901
#905
#906
#91
#910
#911
#918
#921
#93
#930
#932
#935
#94
#941
#947
#95
#950
#951
#953
#954
#957
#958
#96
#963
#964
#966
#968
#97
#971
#977
#981
#986
#989
#990
#993
#994
#999
v0.2.0
v0.2.1
v0.3.0
v0.3.1
v0.3.2
v0.4.0
v0.4.1
v0.4.2
v0.4.3
v0.5.0
v0.6.0
v0.7.0
v0.8.0
v0.8.1
v0.8.2
v0.9.0
v0.9.1
v0.9.2
v0.9.3
v0.9.4
v1.0.0
v1.0.1
v1.0.2
v1.0.3
v1.1.0
v1.1.1
v1.2.0
v1.3.0
v1.3.1
v1.3.2
v1.3.3
v1.3.4
v1.4.0
v1.4.1
v1.4.2
v1.4.3
v1.4.4
v1.4.5
v2.0.0
v2.0.1
v2.0.2
v2.0.3
v2.0.4
v2.1.0
v2.1.1
v2.2.0
v2.3.0
v2.3.1
v2.4.0
v2.4.1
v3.0.0
v3.0.1
v3.0.2
v3.1.0
v3.1.1
v3.2.0
v3.2.1
v3.2.2
v3.2.3
v3.3.0
v3.3.1
v3.3.2
v3.3.3
v3.3.4
v3.3.5
Select branches
Hide Pull Requests
20250708-ci-fixes
add-chat-response-format
add-google-cloud-provider
add-quickstart-script
add-rotary-embed-tests
add-small-ttft-script
add-test-for-warmup-and-kvcache
add_L4
add_api_key
add_batch_dimension
add_chunked_atn
add_chunked_attn
add_deepseekv3
add_gptq_docs
add_integration_test
add_readme_dashboard
add_tunable_prefill
add_vlm_chunking
adding_docs
adjust-mllama-test-output
adjust-where-request-max-tokens-is-defaulted
aiter_kernels
amd-ci-fx
auto_length
automodel-supports-flash-paged-attention
avoid-cuda-graph-during-warmup-if-oom
avoid-zero-seed
backends/trtllm
backends/trtllm-executor
baichuan2-13b
bnb4
bugfix/add_tools_prompt
bugfix/moe-kernels-imports
bugfix/phi-exl2
bump-client-0.6.2
bump-kernel-versions
bump-poetry-and-requirements
chunked_attn_l4
ci-amihalik-update-chat-completion-messages
ci-new-cluster
ci-patch
ci-run-openai-function-calling-compatible-support
ci-update_xpu_image
ci-xpu
ci-xpu2
ci2
ci_amd
ci_amd2
ci_amd3
ci_amd4
close_dl_thread
compat_logger
cuda_ipc_allreduce
debug-gpt2
debug-request-id
debug-torch-23
debug/gemma2
debugging-timeouts
deploy/aml
dev
development-guide
dummy
enable-non-grammar-constrained-tools
enable-qwen2vl-video
enable-transformers-vlm
enable_non_divisible_embeddings
exl2
experiment/moe
explore-static-triton-kernels
explore-t4-gemma-issues
feat-backend-llamacpp
feat/add-load-test
feat/attention_sinks
feat/backend_abstraction
feat/backend_feature
feat/better_tokens
feat/cuda_12
feat/flash_decoding
feat/improve_max_tokens
feat/max_queue_size
feat/page_re_alloc
feat/parse_logs
feat/support_deepspeed
feature/machete
feature/moe-kernels
feature/no_repeat_ngram_size
feature/no_repeat_ngram_size_ci
feature/phi-3-small
feature/prefix
feature/radix-prefix-cache
feature/radix-prefix-cache-bench
feature/vlm-prefix-caching
fix-cudagraph-bug
fix-gemma-tokenization
fix-grammar-cleanup-bug
fix-grammar-fsm-batching
fix-mixtral-adapter-loading
fix-release-tests
fix-repack-for-marlin
fix-tool-call-def
fix-tp
fix-version-install
fix/allow-top-p-0
fix/avoid_record_streams
fix/op-trace-id
fix/parse-mamba-config
fix_default_arg
fix_exl2
fix_fp8_llama3.2
fix_leak
fix_mistral2
fix_neox_rotary_emb
fix_phi3
flashinfer
flashinfer-0.2.5
fp8_kvcache
fp8_rocm
gaudi_llama4_tmp
gha_sccache_use_secrets
git_2.0.4
git_3.1.1
git_3.2.0
git_3.2.1
git_3.3.0
git_3.3.1
git_3.3.2
git_v2.1.0
git_v2.1.1
git_v2.2.0
git_v2.3.0
git_v2.3.1
git_v2.4.0
git_v2.4.1
git_v3.0.0
git_v3.0.1
git_v3.0.2
git_v3.1.0
git_v3.2.2
git_v3.2.3
git_v3.3.3
git_v3.3.4
git_v3.3.5
improve-docs
improve-dynamic-message-content
improve-json-schema-field
improve-tool-call-and-response-ids
improve_defaults
improve_launcher_defaults
inlcude-latest-release-on-commit-builds-tags
ipex-moe
kvrouter
kvrouter-endpoints
llama-fused-compiled-mlp
main
maintenance/docker-network
maintenance/merge-vlm-input-prep
mamba2
martinigoyanes-fix-frequency-penalty
medusa
megatron
message-more-info
mi300-temp
mllama
model_compat_log
more_logs
multi-lora
new_minor_version
nix/cargo-clippy
nix/docker2
nix/pytorch-2.5.1
nix_integration_tests
nix_test2
no_root_user
no_root_user2
op-compilation-benchmarking
origin/slind_window_fix
osanseviero-patch-1
patch_version_3.3.6
pip-installable
pr-1869-ci-run
pr-2076-ci-run
pr-2290-ci-runner
pr-2366-ci-branch
pr-2444-ci-branch
pr-2517-ci-branch
pr-2711-ci-branch
pr-2784-ci-branch
pr-2840-ci-branch
pr-2954-ci-branch
pr-3002-ci-branch
pr-3004-ci-branch
pr-3018-ci-branch
precompile-kernels-workflow
prefix_chunk
prefix_default
proxy_sse_engine_state
quantization
quantization-0.1
refactor-lora-linear
release-3.2.4
remove_post_load_weights
response-header-metrics
revert
rocm-ci-build
rocm_6.2_fixes
router-grammar-compile
s3-cache
self-generating-docs
set-num-blocks
simpler_exllama
skip-mistral-test
speculative
streaming_conceptual
support-granite-vision
support-logit-bias-in-chat
support-phi-model
support-phi3-small
support-pre-compile-kernels
temp_work
test-batch-speedup-amount
test_docs
test_rocm
tmp_invariants
tmp_medusa
tmp_torch_compile
transformers-ci
triton_fix
trtllm-stop-words
trtllm/executor_stats
tuna
update-jsonschema
update_docs2
update_internal_version
update_peft
update_readme
upgrade-outlines
upgrade_mlp_speculator
use_g6
use_updated_kernels
vllm/setup
zstd
#1
#100
#101
#1010
#1018
#1019
#102
#1022
#1023
#1024
#103
#1033
#1034
#1042
#1044
#1045
#1048
#1049
#1052
#1054
#1058
#1059
#106
#1060
#1061
#1063
#1064
#1065
#1066
#1068
#107
#1070
#1071
#1075
#1076
#1077
#108
#1080
#1081
#1089
#109
#1090
#1091
#1092
#1094
#1096
#1097
#1099
#11
#110
#1100
#1101
#1102
#1103
#1105
#1110
#1112
#1116
#1123
#1128
#1134
#114
#1140
#1141
#115
#1153
#1155
#116
#1165
#1165
#117
#1173
#1176
#1178
#1179
#118
#1182
#1183
#1184
#1187
#119
#1198
#1202
#1211
#1214
#1219
#122
#1224
#1228
#123
#1239
#1241
#1242
#1243
#1246
#1252
#126
#1260
#1267
#1270
#1272
#1274
#1276
#1279
#128
#1285
#1287
#129
#1294
#1295
#13
#130
#1301
#1305
#1307
#1308
#1313
#132
#1326
#1328
#133
#1336
#1337
#134
#1341
#1343
#1346
#1347
#1348
#135
#1351
#1352
#1353
#1358
#136
#1361
#1364
#137
#1370
#1373
#138
#1381
#1386
#139
#1390
#1395
#14
#140
#1408
#141
#1414
#1419
#142
#1420
#1424
#1425
#1427
#1428
#143
#1436
#144
#1442
#1448
#145
#1450
#1453
#1454
#1455
#1459
#1461
#1462
#1463
#1469
#147
#1470
#1471
#1473
#1475
#1476
#1477
#1478
#148
#1480
#1484
#1486
#1488
#1489
#149
#1490
#1491
#1492
#1494
#1495
#1496
#1497
#1498
#15
#150
#1502
#1504
#1505
#1506
#151
#1511
#1512
#1514
#1515
#1516
#1517
#1518
#152
#1520
#1523
#1524
#1526
#1527
#153
#1532
#1533
#1537
#1539
#154
#1540
#1541
#1542
#1543
#155
#1550
#1552
#1555
#1556
#1557
#1560
#1563
#1564
#1567
#1568
#1569
#1570
#1571
#1576
#1577
#1578
#1579
#1580
#1583
#1584
#1585
#1586
#1587
#1588
#159
#1591
#1592
#1594
#16
#160
#1603
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1614
#1617
#1618
#1619
#162
#1621
#1626
#1628
#163
#1632
#1637
#1638
#1639
#164
#1646
#1648
#1650
#1651
#1653
#1658
#1660
#1662
#1663
#1664
#1666
#1667
#1668
#167
#1676
#168
#1682
#1685
#1686
#1693
#1697
#1698
#17
#170
#1702
#1703
#1704
#1707
#1708
#1709
#1710
#1713
#1714
#1715
#1716
#1718
#1719
#1726
#1727
#1729
#173
#1730
#1731
#1734
#1735
#1736
#1737
#1739
#174
#1740
#1747
#1748
#1749
#175
#1751
#1755
#1756
#1758
#1759
#1760
#1764
#1765
#1767
#1768
#1773
#1774
#1775
#178
#1784
#1789
#179
#1790
#1791
#1797
#1798
#18
#180
#1800
#1801
#1808
#181
#1811
#1812
#1813
#1815
#1816
#1817
#1818
#1820
#1825
#1827
#1828
#1829
#183
#1830
#1832
#1833
#1835
#1836
#1839
#184
#1840
#1841
#1843
#1844
#1845
#1848
#1849
#185
#1850
#1851
#1854
#1855
#186
#1860
#1865
#1866
#1869
#187
#1882
#1884
#1886
#1888
#1889
#1890
#1892
#1894
#1895
#1898
#19
#190
#1902
#1906
#1908
#1909
#191
#1910
#1912
#1915
#1916
#1917
#1918
#1919
#1920
#1921
#1923
#1924
#1925
#1929
#193
#1931
#1932
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1942
#1947
#1948
#1949
#1950
#1951
#1953
#1954
#1958
#1959
#196
#1963
#1965
#1967
#1970
#1971
#1975
#1980
#1981
#1985
#1986
#1988
#1989
#1990
#1994
#1995
#1996
#2
#20
#200
#2002
#2003
#2004
#2005
#2006
#2008
#201
#2010
#2011
#2013
#2014
#2015
#2016
#2017
#202
#2020
#2021
#2022
#2023
#2024
#2028
#203
#2031
#2032
#2033
#2034
#2038
#2039
#2044
#2045
#2046
#2047
#2049
#205
#2050
#2052
#2054
#2059
#2060
#2061
#2062
#2063
#2065
#2066
#2068
#207
#2071
#2072
#2074
#2075
#2076
#2078
#2079
#208
#2080
#2084
#2085
#2086
#2087
#2088
#2089
#2091
#2092
#2094
#2095
#2097
#2099
#210
#2101
#2102
#2103
#2104
#2105
#2109
#2110
#2111
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2123
#2124
#2125
#2127
#2128
#2129
#213
#2131
#2132
#2133
#2134
#2135
#2137
#2138
#214
#2140
#2141
#2142
#2148
#2149
#215
#2151
#2152
#2153
#2155
#2156
#2158
#216
#2161
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2173
#2175
#2176
#2178
#2179
#218
#2180
#2181
#2182
#2186
#2187
#2189
#219
#2190
#2191
#2193
#2194
#2196
#22
#220
#2201
#2202
#2203
#2204
#2205
#2208
#2209
#221
#2212
#2213
#2215
#2216
#2217
#222
#2220
#2221
#2224
#2225
#2226
#2228
#2230
#2231
#2232
#2233
#2237
#2242
#2243
#2244
#2245
#2248
#2249
#2250
#2251
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2266
#2268
#2269
#227
#2271
#2272
#2273
#2274
#2276
#2277
#2278
#2279
#228
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#2290
#2291
#2292
#2298
#2299
#23
#2300
#2303
#2304
#2306
#2307
#2308
#2309
#2311
#2311
#2313
#2314
#2315
#2317
#2320
#2323
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2333
#2335
#2336
#2337
#2338
#2339
#234
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#235
#2350
#2351
#2352
#2353
#2354
#2357
#2358
#2359
#2360
#2361
#2364
#2365
#2366
#2367
#2368
#237
#2370
#2371
#2372
#2374
#2377
#2378
#2379
#2381
#2382
#2384
#2385
#2386
#2387
#2389
#2390
#2391
#2392
#2394
#2395
#2396
#2397
#2398
#2399
#24
#2401
#2402
#2403
#2404
#2405
#2406
#2407
#2408
#2410
#2411
#2412
#2414
#2415
#2416
#2419
#242
#2420
#2422
#2423
#2424
#2426
#2427
#2428
#2429
#2430
#2431
#2433
#2437
#2438
#2439
#244
#2442
#2443
#2444
#2449
#2450
#2451
#2453
#2454
#2455
#2459
#246
#2462
#2463
#2468
#2469
#2470
#2471
#2472
#2473
#2477
#2478
#2479
#248
#2481
#2482
#2484
#2486
#2489
#2490
#2491
#2492
#2493
#2494
#2496
#2497
#2498
#2499
#25
#250
#2500
#2501
#2507
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#252
#2520
#2521
#2524
#2525
#2527
#2528
#2529
#2532
#2533
#2535
#2536
#2537
#2538
#2539
#2540
#2545
#2546
#2547
#2548
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2561
#2562
#2563
#2566
#2567
#2568
#257
#2574
#2575
#2577
#2578
#2579
#258
#2580
#2582
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2592
#2594
#2595
#2596
#2597
#2599
#26
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2609
#261
#2610
#2611
#2612
#2614
#2616
#2617
#2619
#262
#2620
#2622
#2623
#2625
#2627
#2628
#2629
#2630
#2631
#2632
#2633
#2634
#2637
#264
#2640
#2642
#2642
#2645
#2646
#2647
#2648
#2650
#2651
#2652
#2655
#2658
#2659
#266
#2661
#2663
#2664
#2665
#2666
#2668
#267
#2673
#2674
#2677
#2678
#2680
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2694
#2695
#2697
#2698
#2699
#27
#2701
#2702
#2704
#2706
#2707
#2708
#2709
#2710
#2711
#2712
#2713
#2714
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2723
#2724
#2725
#2726
#2732
#2733
#2738
#274
#2740
#2741
#2742
#2743
#2745
#2746
#275
#2750
#2751
#2753
#2754
#2755
#2756
#2756
#2758
#276
#2760
#2761
#2762
#2764
#2765
#2766
#2767
#2768
#2769
#277
#2770
#2771
#2772
#2773
#2774
#2777
#2778
#2779
#278
#2782
#2784
#2785
#2786
#2788
#2789
#2790
#2791
#2792
#2793
#2795
#2796
#2797
#2798
#2799
#28
#2802
#2803
#2806
#2807
#2808
#2809
#2810
#2811
#2812
#2814
#2815
#2816
#2817
#2818
#282
#2824
#2825
#2826
#2827
#2829
#2831
#2833
#2834
#2837
#284
#2840
#2841
#2845
#2846
#2848
#2848
#2849
#285
#2850
#2852
#2855
#2856
#2858
#2859
#286
#2861
#2862
#2863
#2866
#2866
#287
#2870
#2874
#2874
#2878
#2882
#2883
#2884
#2885
#2886
#2891
#2892
#2893
#2898
#29
#2901
#2903
#2904
#2905
#2907
#2908
#2910
#2911
#2912
#2913
#2916
#2917
#2918
#2919
#292
#2921
#2922
#2924
#2928
#2929
#2931
#2932
#2933
#2935
#2936
#2937
#2938
#294
#2940
#2941
#2942
#2943
#2945
#2947
#2948
#2949
#2950
#2951
#2953
#2954
#2957
#2958
#2960
#2961
#2962
#2963
#2964
#2964
#2965
#2965
#2966
#2967
#2968
#297
#2970
#2971
#2972
#2975
#2976
#2976
#2977
#2979
#298
#2980
#2981
#2982
#2983
#2988
#299
#2990
#2991
#2992
#2993
#2995
#2996
#2998
#2999
#30
#3000
#3001
#3002
#3003
#3004
#3006
#3007
#3008
#3010
#3012
#3012
#3013
#3015
#3016
#3017
#3018
#302
#3021
#3022
#3024
#3027
#3028
#3029
#303
#3030
#3031
#3032
#3033
#3034
#3034
#3035
#3036
#3037
#3039
#304
#3040
#3041
#3042
#3044
#3045
#3046
#3047
#3048
#3049
#305
#3051
#3053
#3054
#3055
#3056
#3057
#3060
#3061
#3063
#3063
#3064
#3064
#3065
#3065
#3067
#3068
#3069
#3072
#3073
#3074
#3075
#3076
#3077
#3078
#3079
#3079
#308
#3080
#3081
#3083
#3084
#3085
#3086
#3089
#3091
#3092
#3093
#3094
#3095
#3096
#3098
#3099
#31
#310
#3100
#3101
#3103
#3106
#3107
#3108
#3109
#3111
#3111
#3112
#3113
#3116
#3117
#3118
#3120
#3121
#3122
#3124
#3125
#3126
#3127
#3128
#3129
#313
#3131
#3132
#3134
#3136
#3141
#3142
#3144
#3145
#3147
#3148
#3150
#3151
#3152
#3154
#3154
#3156
#3157
#3160
#3161
#3162
#3162
#3164
#3164
#3166
#3166
#3167
#317
#3170
#3171
#3172
#3174
#3176
#3179
#318
#3182
#3184
#3186
#3186
#3187
#3188
#3189
#3193
#3194
#3194
#3196
#3197
#3197
#32
#3200
#3201
#3201
#3202
#3204
#3206
#3207
#3210
#3210
#3211
#3217
#3218
#3220
#3221
#3222
#3223
#3224
#3229
#3230
#3231
#3231
#3232
#3234
#3235
#3236
#3237
#3238
#3239
#3240
#3242
#3244
#3245
#3246
#3249
#325
#3252
#3253
#3254
#3255
#3255
#3256
#3258
#3260
#3261
#3262
#3263
#3264
#3265
#3266
#3266
#3267
#3268
#3269
#327
#3270
#3271
#3273
#3274
#3276
#3276
#328
#3280
#3281
#3282
#3283
#3284
#3286
#3287
#3288
#3288
#329
#3291
#3291
#3292
#3292
#3297
#3298
#33
#3300
#3300
#3302
#3302
#3308
#3310
#3310
#3311
#3312
#3313
#3314
#3315
#3315
#3319
#3322
#3323
#3324
#3325
#3326
#3327
#334
#335
#336
#34
#340
#341
#343
#344
#348
#35
#351
#352
#353
#356
#357
#358
#359
#36
#360
#362
#363
#364
#367
#368
#37
#370
#373
#379
#384
#385
#388
#39
#393
#394
#395
#396
#4
#40
#400
#404
#406
#407
#41
#411
#412
#42
#434
#438
#44
#441
#443
#45
#453
#46
#462
#465
#47
#470
#472
#475
#477
#48
#480
#483
#485
#488
#49
#498
#5
#50
#501
#502
#51
#513
#514
#516
#519
#52
#520
#521
#522
#525
#529
#53
#534
#54
#543
#544
#545
#55
#550
#553
#557
#558
#56
#561
#562
#567
#57
#575
#578
#579
#58
#580
#581
#582
#583
#585
#586
#587
#588
#59
#590
#595
#596
#6
#60
#600
#605
#608
#609
#61
#611
#616
#617
#618
#619
#62
#621
#623
#624
#626
#63
#630
#633
#634
#635
#639
#64
#642
#643
#647
#648
#659
#66
#661
#664
#665
#666
#67
#670
#671
#678
#68
#684
#689
#698
#7
#70
#704
#708
#71
#712
#713
#715
#719
#72
#721
#723
#725
#727
#73
#733
#737
#738
#740
#741
#743
#745
#746
#748
#75
#750
#76
#761
#762
#767
#768
#770
#773
#783
#785
#789
#791
#793
#794
#795
#797
#798
#799
#8
#803
#805
#806
#809
#810
#812
#82
#820
#821
#822
#823
#829
#831
#836
#838
#84
#842
#848
#85
#851
#852
#853
#854
#858
#86
#860
#862
#867
#868
#87
#872
#88
#881
#884
#886
#889
#89
#892
#893
#898
#9
#90
#900
#901
#905
#906
#91
#910
#911
#918
#921
#93
#930
#932
#935
#94
#941
#947
#95
#950
#951
#953
#954
#957
#958
#96
#963
#964
#966
#968
#97
#971
#977
#981
#986
#989
#990
#993
#994
#999
v0.2.0
v0.2.1
v0.3.0
v0.3.1
v0.3.2
v0.4.0
v0.4.1
v0.4.2
v0.4.3
v0.5.0
v0.6.0
v0.7.0
v0.8.0
v0.8.1
v0.8.2
v0.9.0
v0.9.1
v0.9.2
v0.9.3
v0.9.4
v1.0.0
v1.0.1
v1.0.2
v1.0.3
v1.1.0
v1.1.1
v1.2.0
v1.3.0
v1.3.1
v1.3.2
v1.3.3
v1.3.4
v1.4.0
v1.4.1
v1.4.2
v1.4.3
v1.4.4
v1.4.5
v2.0.0
v2.0.1
v2.0.2
v2.0.3
v2.0.4
v2.1.0
v2.1.1
v2.2.0
v2.3.0
v2.3.1
v2.4.0
v2.4.1
v3.0.0
v3.0.1
v3.0.2
v3.1.0
v3.1.1
v3.2.0
v3.2.1
v3.2.2
v3.2.3
v3.3.0
v3.3.1
v3.3.2
v3.3.3
v3.3.4
v3.3.5
-
22d9249c4a
Fix the bug
Sadra Barikbin
2024-08-07 23:48:15 +0330 -
4dc67e4ef3
Vidya Galli
2024-08-07 13:09:51 -0700 -
9b71343328
Abhilash Majumder
2024-08-08 01:36:05 +0530 -
dea2b747d1
Vaibhav Srivastav
2024-08-07 19:50:02 +0200 -
3c9a840362
Vaibhav Srivastav
2024-08-07 15:55:10 +0200 -
76ba66b4f8
Update Quantization docs and minor doc fix.
Vaibhav Srivastav
2024-08-07 15:33:38 +0200 -
4430e123d4
add intel-cpu docker image
Wang, Yi A
2024-08-07 00:02:31 -0700 -
6abcab843d
fix in regression in ipex flashattention
Wang, Yi A
2024-08-06 22:00:36 -0700 -
30e70f2ceb
Merge branch 'main' into hot_fix_xpu
Wang, Yi A
2024-08-06 21:57:25 -0700 -
3e41ec28c7
add gptj modeling
Wang, Yi A
2024-08-01 18:26:04 -0700 -
133015f408
drbh
2024-08-06 15:25:30 -0400 -
3280d59f19
fix: prefer original layernorm names for 180B
drbh
2024-08-06 19:08:09 +0000 -
a64d407d64
drbh
2024-08-06 13:33:22 -0400 -
fecc66d736
fix: default num_ln_in_parallel_attn to one if not supplied
drbh
2024-08-06 17:21:10 +0000 -
1768c00b9f
drbh
2024-08-06 13:10:19 -0400 -
f8a5b381fe
drbh
2024-08-06 13:09:50 -0400 -
e557855558
revert mkl
Mohit Sharma
2024-08-06 12:46:35 +0000 -
d61f7e63fa
fix clone
Mohit Sharma
2024-08-06 12:39:49 +0000 -
f230da8d63
Keeping the benchmark somewhere
feature/radix-prefix-cache-bench
Daniël de Kok
2024-08-06 12:36:15 +0000 -
e11f5f1c38
drbh
2024-08-06 07:51:32 -0400 -
29b8d19cdf
drbh
2024-08-06 07:49:53 -0400 -
7865851c02
Test partially overlapping prefills.
Daniël de Kok
2024-08-06 11:45:20 +0000 -
2a255ad719
Initial radix cache tests
Daniël de Kok
2024-08-06 10:50:18 +0000 -
5788c942a5
ix issues
Mohit Sharma
2024-08-06 10:29:46 +0000 -
6486887b43
Add radix cache free, improve allocate
Daniël de Kok
2024-08-06 08:57:34 +0000 -
516b43f006
fix: return the out tensor rather then the functions return value
drbh
2024-08-05 19:06:55 +0000 -
4379f0650a
feat: add release and sha tagged images
inlcude-latest-release-on-commit-builds-tags
drbh
2024-08-05 13:13:52 -0400 -
dd47a3dac4
drbh
2024-08-05 12:36:44 -0400 -
9415b90892
First radix allocation bits
Daniël de Kok
2024-08-05 15:55:14 +0000 -
07ede8d8e5
Fix splitting
Daniël de Kok
2024-08-05 15:54:56 +0000 -
6c547a69dc
feat: include local lora adapter loading docs
drbh
2024-08-05 09:23:28 -0400 -
215ed3ad52
drbh
2024-08-05 09:11:40 -0400 -
55e6059eb1
update torch
Mohit Sharma
2024-08-05 12:47:21 +0000 -
0ad78d20a5
style
Mohit Sharma
2024-08-05 10:12:46 +0000 -
ab2ab2a0aa
pre-commit
feature/no_repeat_ngram_size_ci
erikkaum
2024-08-05 13:01:19 +0200 -
05a1d1b83a
forgot fixture
erikkaum
2024-08-05 12:49:24 +0200 -
5da696046e
Block/node eviction
Daniël de Kok
2024-08-05 09:46:37 +0000 -
82d7914761
delete release decorator
erikkaum
2024-08-05 11:15:15 +0200 -
c4258e40fe
feat: simplify prepare_chat_input logic and adjust start stop chars
drbh
2024-08-02 23:35:28 +0000 -
645a6f8068
fix: typo tweak
drbh
2024-08-02 21:48:04 +0000 -
ad942a1d79
fix: avoid changing conditional
drbh
2024-08-02 21:46:58 +0000 -
afc0fb5adf
fix: simplify changes and revert model changes
drbh
2024-08-02 19:01:58 +0000 -
cf27954257
fix: update sliding window conditional
drbh
2024-08-02 17:39:08 +0000 -
ce76f4ccc3
Update parent refcounts when inserting a child
Daniël de Kok
2024-08-02 15:02:28 +0000 -
aa1c96a7a4
Access times
Daniël de Kok
2024-08-02 14:17:56 +0000 -
9caee1f368
Parent links
Daniël de Kok
2024-08-02 13:54:38 +0000 -
590fc2c58d
Double linked data structures are still terrible in Rust.
Daniël de Kok
2024-08-02 13:50:56 +0000 -
ed83bfe0ff
Hugo Larcher
2024-08-02 15:10:15 +0200 -
690d631d68
Hugo Larcher
2024-08-02 14:54:34 +0200 -
28b8a4287d
Hugo Larcher
2024-08-02 14:44:37 +0200 -
cd4933cd5a
Trie insertion/lookup
Daniël de Kok
2024-08-02 11:05:03 +0000 -
c9916107b7
Add FlashInfer support
Daniël de Kok
2024-08-01 11:02:22 +0000 -
d34ffc4fe9
Refile the hpu warmup
yuanwu
2024-08-02 04:36:59 +0000 -
e9842ceef2
feat: return the generated text when parsing fails
drbh
2024-08-01 23:40:23 +0000 -
fe5c19d155
Max de Bayser
2024-08-01 15:53:32 -0300 -
5b649d67c4
fix: improve condtional and error message
drbh
2024-08-01 16:17:29 +0000 -
060b2db0df
add 'mamba' as model config
fix/parse-mamba-config
erikkaum
2024-08-01 18:16:32 +0200 -
82240bf44c
Erik Kaunismäki
2024-08-01 17:38:09 +0200 -
cae28dcbf1
fix: prefer version check over test op and avoid window_size_left if not flash attn2
drbh
2024-08-01 15:06:09 +0000 -
743d37812d
WIP
Daniël de Kok
2024-08-01 15:05:51 +0000 -
47447ef017
Daniël de Kok
2024-08-01 17:03:28 +0200 -
5d482d4da2
Port over block allocator interface (with token ids)
Daniël de Kok
2024-08-01 13:41:07 +0000 -
4562c16048
Use a block size of 1 for FlashInfer
Daniël de Kok
2024-08-01 11:20:42 +0000 -
8fb8e1da78
Add FlashInfer support
Daniël de Kok
2024-08-01 11:02:22 +0000 -
fe41e13b45
Unify attention output handling
Daniël de Kok
2024-07-31 14:53:58 +0000 -
22fb1be588
Daniël de Kok
2024-08-01 15:38:57 +0200 -
f484bcb552
Also run CI on changes to
backends
Daniël de Kok
2024-08-01 13:01:01 +0000 -
278697cf55
Fix cache block size for flash decoding
Daniël de Kok
2024-08-01 12:34:34 +0000 -
0ba10078e8
pre-commit again
erikkaum
2024-08-01 13:57:49 +0200 -
f5a6691d0e
pre-commit
erikkaum
2024-08-01 13:37:45 +0200 -
dab00af971
fix: fix num_ln_in_parallel_attn attribute name typo in RWConfig
Islam Almersawi
2024-08-01 14:35:00 +0400 -
98e790e32a
add param in healthcheck
erikkaum
2024-08-01 12:10:10 +0200 -
7186ab8e8e
draft of unit integration test
erikkaum
2024-07-31 17:29:45 +0200 -
54b45be38d
add missed commit
Nathan Brake
2024-07-23 12:44:50 -0400 -
d8d3c4678e
run update docs
erikkaum
2024-07-25 18:03:21 +0200 -
72cade84f9
fix pre-commit checks
erikkaum
2024-07-25 18:01:52 +0200 -
f6324ffb3a
delete the last no repeat processor from warpers
erikkaum
2024-07-25 17:31:04 +0200 -
6353e2417b
satisfy compiler
erikkaum
2024-07-18 18:04:00 +0200 -
d0eef2b552
make nrns optional
Nathan Brake
2024-07-15 13:58:34 +0000 -
10b940559a
update docs
Nathan Brake
2024-07-15 13:55:43 +0000 -
ea915ad7d7
Add support for no_repeat_ngram_size
Nathan Brake
2024-07-15 13:51:11 +0000 -
9ab9937414
Wang, Yi
2024-08-01 17:08:36 +0800 -
2c288866a7
Unify attention output handling
Daniël de Kok
2024-07-31 14:53:58 +0000 -
c3e874aaf5
fix EleutherAI/gpt-neox-20b does not work in tgi
Wang, Yi A
2024-07-31 22:29:12 -0700 -
e4a0bf3b71
fix: allocate tmp based on sgmv kernel if available
drbh
2024-07-31 22:15:52 +0000 -
acb41a5e6f
(chore) fmt ... why?
Morgan Funtowicz
2024-07-31 20:38:30 +0000 -
40658f4e84
fix: adjust return type
drbh
2024-07-30 13:50:10 +0000 -
26b954dfd3
feat: improve to tokenize too
drbh
2024-07-30 13:48:13 +0000 -
62d7be3727
feat: implement a templated endpoint for visibility into chat requests
drbh
2024-07-30 13:06:52 +0000 -
ca8ad2dbee
feat: prefer stop over eos_token to align with openai finish_reason
drbh
2024-07-31 15:34:48 +0000 -
7451041ecd
Erik Kaunismäki
2024-07-31 16:29:07 +0200 -
f7f61876cf
drbh
2024-07-31 10:27:15 -0400 -
290e7bd173
delete option around usage stats arg
erikkaum
2024-07-31 14:51:10 +0200 -
e51171a18d
Erik Kaunismäki
2024-07-31 14:41:10 +0200 -
6df2557910
(launcher) default new server::run parameters to false for now
Morgan Funtowicz
2024-07-31 09:06:52 +0000 -
81682561bd
(docker) build ompi with SLURM support
Morgan Funtowicz
2024-07-31 09:06:24 +0000 -
4ff17caaed
(docker) let's put rust in the TRTLLM folder when building
Morgan Funtowicz
2024-07-31 09:06:11 +0000 -
4c1e234266
(backend) use parking_lot crate for RwLock fairness
Morgan Funtowicz
2024-07-31 12:30:53 +0000 -
f476b0cc34
fix pre-commit
erikkaum
2024-07-31 13:24:52 +0200 -
34f7dcfd80
GPTQMarlinWeightLoader
(#2300)Daniël de Kok
2024-07-31 13:08:41 +0200