text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 04:14:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

2e067fabd3

fix: clippy Hugo Larcher 2025-01-27 14:37:57 +0100
57570bf598

Fixing the oom maybe with 2.5.1 change. Nicolas Patry 2025-01-27 14:35:00 +0100
a8ba2542d8

fix: update ping delay and update doc. Hugo Larcher 2025-01-27 14:12:03 +0100
19bb3bf355

fix: simplify error handling Hugo Larcher 2025-01-27 14:04:33 +0100
db922eb77e

Update to attention-kernels 0.2.0 (#2950) Daniël de Kok 2025-01-27 11:42:36 +0100
40b00275b2

Attempt to remove AWS S3 flaky cache for sccache (#2953) Funtowicz Morgan 2025-01-27 11:21:48 +0100
5a317ffad7 backend(trtllm): inject ompi_version build arg in dependent step Morgan Funtowicz 2025-01-26 15:21:38 +0100
c632f8a95a backend(trtllm): Cache mode max to cache intermediate layers Morgan Funtowicz 2025-01-26 11:38:00 +0100
cad4644537 backend(trtllm): export env variable in run mb? Morgan Funtowicz 2025-01-25 08:00:07 +0100
cb1dab12c1 backend(trtllm): ok let's try to define the launchers in build.rs when rustc_wrapper is present Morgan Funtowicz 2025-01-25 01:31:13 +0100
e7064c95da backend(trtllm): make sccache definition manually Morgan Funtowicz 2025-01-24 22:01:43 +0100
a434c2ffc9 backend(trtllm): relax the way to detect sccache Morgan Funtowicz 2025-01-24 19:36:28 +0100
4c8bf7f5b8

fix: add telemetry regular pings and fix unhandled errors avoid not sending telemetry stop events. Hugo Larcher 2025-01-24 18:10:12 +0100
cb452ae7e8 backend(trtllm): and with the right env var for gha sccache Morgan Funtowicz 2025-01-24 17:50:06 +0100
a8a9168065 backend(trtllm): what if we expose ENV instead of inline? Morgan Funtowicz 2025-01-24 17:47:49 +0100
556a61d143 backend(trtllm): attempt to remove AWS S3 flaky cache for sccache Morgan Funtowicz 2025-01-24 15:50:28 +0100
bafbd06744

Update transformers_flash_causal_lm.py fix-tp Cyril Vallez 2025-01-24 15:06:50 +0100
de83178bc3

tp monkey patch Cyril Vallez 2025-01-24 15:03:14 +0100
2024f54d71

feat: Make streaming for tool calling behave the same as the open ai api Nicolas Casademont 2025-01-24 14:42:25 +0100
6cb41a80a1

Revert "Remove AWS credentials?" Nicolas Patry 2025-01-24 14:34:17 +0100
d2ff68e98d

Remove AWS credentials? Nicolas Patry 2025-01-24 12:18:28 +0100
b70f29d729

Bypasse perm issue. v3.0.2 git_v3.0.2 Nicolas Patry 2025-01-24 12:12:47 +0100
9157833662 Update to attention-kernels 0.2.0 Daniël de Kok 2025-01-23 12:49:07 +0000
4b8d09d63e

fix: Adapt function call response to return a json string for arguments Nicolas Casademont 2025-01-24 11:47:01 +0100
e413b01eb1

Create patch release. Nicolas Patry 2025-01-24 10:50:15 +0100
02e4b9ab32 backend(vllm): plug in the tokio server and CLI Morgan Funtowicz 2025-01-24 10:41:07 +0100
2bf3ea8517 add local file read path for image which could work with dataset like Lin-Chen/ShareGPT4V Wang, Yi A 2025-01-22 23:01:39 -0800
f709466767 Make tool_call a list for streaming case datta0 2025-01-24 09:09:40 +0000
3495248d87 Fix tool call response to adhere to OpenAI spec datta0 2025-01-24 07:22:11 +0000
d9dda11726

Trying to put back the archlist (to fix the oom). (#2947) Nicolas Patry 2025-01-24 09:32:17 +0100
0dd8a96613

Trying to put back the archlist (to fix the oom). Nicolas Patry 2025-01-24 00:46:47 +0100
d937eb64da

Fixing cargo lock. Nicolas Patry 2025-01-23 18:54:34 +0100
18c4607d46

Transformers backend TP fix (#2945) Cyril Vallez 2025-01-23 18:09:57 +0100
bcd9d3a5cb

cohere fix Cyril Vallez 2025-01-23 12:49:30 +0000
f4dc44b88c

init dispatch Cyril Vallez 2025-01-23 12:40:56 +0000
29a0893b67

Tmp tp transformers (#2942) Nicolas Patry 2025-01-23 18:07:30 +0100
fe7594e369

Fix the warmup issue of prefill batch_size (#268) Yuan Wu 2025-01-24 00:26:17 +0800
0a89902663

[TRTLLM] Expose finish reason (#2841) Funtowicz Morgan 2025-01-23 16:48:26 +0100
0f2845081b

Remove the archlist, it's defined in the docker anyway. Nicolas Patry 2025-01-23 14:31:35 +0100
4e172028aa

Add NVIDIA A40 to known cards (#2941) Nikolai Kolodziej 2025-01-23 14:19:21 +0100
6ab02931cf

Set alias for max_completion_tokens in ChatRequest (#2932) Alvaro Bartolome 2025-01-23 14:18:47 +0100
0c879ff318 misc(backend): update deps Morgan Funtowicz 2025-01-21 14:17:58 +0100
4c6ee944d0 misc(llamacpp): fix typo Morgan Funtowicz 2024-12-13 17:13:29 +0100
0da255ecbc feat(trtllm): expose finish reason to Rust Morgan Funtowicz 2024-12-10 16:51:22 +0100
cc212154e0

Bump TensorRT-LLM backend dependency to v0.16.0 (#2931) Funtowicz Morgan 2025-01-23 13:54:40 +0100
f331091ba3

Using 2.5 kernels. Nicolas Patry 2025-01-23 12:14:46 +0100
980fb92529 backend(trtllm): add correctly untar it Morgan Funtowicz 2025-01-23 12:00:15 +0100
6fd50ff3ba backend(trtllm): make sure we are using correct path for openmpi ADD in dockerfile Morgan Funtowicz 2025-01-23 11:26:20 +0100
83c1ea8f7d

Building AOT. Nicolas Patry 2025-01-23 10:49:22 +0100
bd2ec03d53 backend(vllm): statically allocate LLMEngine Morgan Funtowicz 2025-01-22 22:15:33 +0100
1dd346666a

Clarify FP8-Marlin use on capability 8.9 (#2940) Daniël de Kok 2025-01-22 18:18:11 +0100
5f17b51a9c

Revert the flashinfer (this will fails). Nicolas Patry 2025-01-22 18:16:54 +0100
8d05d6a62c

Put back the attention impl. Nicolas Patry 2025-01-22 18:13:59 +0100
6fe37d61d0

Fixing the transformers backend. Nicolas Patry 2025-01-22 17:47:20 +0100
859d2f0464

Tmp branch to test transformers backend with 2.5.1 and TP>1 Nicolas Patry 2025-01-22 17:33:08 +0100
61a0b95f63 feat: add NVIDIA A40 to known cards Nikolai Kolodziej 2025-01-22 17:07:30 +0100
f187e993b9 Clarify FP8-Marlin use on capability 8.9 Daniël de Kok 2025-01-22 15:50:40 +0000
1d3c9beba8

fix moe in quantization path (#2935) Wang, Yi 2025-01-22 21:36:15 +0800
6d335ca7ce

Remove modifications in Lock. new_minor_version Nicolas Patry 2025-01-22 13:37:17 +0100
b21d3c1e73

Upgrade the version number. Nicolas Patry 2025-01-22 12:29:50 +0100
2dfe3b3ee6

Upgrading the deps to have transformers==4.48.0 necessary (#2937) Nicolas Patry 2025-01-22 12:20:15 +0100
0736c8c8b9

Upgrading the deps to have transformers==4.48.0 necessary Nicolas Patry 2025-01-22 12:09:28 +0100
fd88b1d6b9

llava next image encoder to allow un-aligned patch / image sizes Jiayu Liu 2025-01-22 17:09:59 +0800
fd0d628a59 fix moe in quantization path update ipex xpu to support moe for mixtral Wang, Yi A 2025-01-21 23:34:44 -0800
a7e5179f10 backend(trtllm): attempt to use ADD instead of RUN for openmpi Morgan Funtowicz 2025-01-21 23:40:45 +0100
cfd22726c9 backend(vllm): initial commit Morgan Funtowicz 2025-01-21 23:37:56 +0100
e958cab0c1

Merge branch 'main' into fix-max-completion-tokens Alvaro Bartolome 2025-01-21 17:58:28 +0100
64a33c1f05

Run pre-commit run --all-files to fix CI (#2933) Alvaro Bartolome 2025-01-21 17:33:33 +0100
5836fee2d0

Run pre-commit run --all-files to fix CI Alvaro Bartolome 2025-01-21 17:02:11 +0100
dbadea98a2

Set alias for max_completion_tokens in ChatRequest Alvaro Bartolome 2025-01-21 16:39:56 +0100
ebfe9d9f50 backend(trtllm): reenable shallow clone Morgan Funtowicz 2025-01-21 15:23:25 +0100
7c1c587b38 backend(trtllm): move to nvidia remote instead of hf Morgan Funtowicz 2025-01-21 15:18:59 +0100
10f713bcb6 backend(trtllm): use tag instead Morgan Funtowicz 2025-01-21 15:13:01 +0100
9eff8dd33b backend(trtllm): do not use shallow clone Morgan Funtowicz 2025-01-21 14:59:30 +0100
dc564aa022 backend(trtllm): update to 0.16.0 Morgan Funtowicz 2025-01-21 14:46:21 +0100
bdb3e488e4

Trying to avoid the random timeout. (#2929) Nicolas Patry 2025-01-21 11:06:10 +0100
17367438f3

Give TensorRT-LLMa proper CI/CD 😍 (#2886) Funtowicz Morgan 2025-01-21 10:19:16 +0100
63c64bb307

Use the default value in globals.py (#265) Yuan Wu 2025-01-21 17:10:23 +0800
8de110ae9f

Fix warmup with SKIP_TOKENIZER_IN_TGI=true (#266) Karol Damaszke 2025-01-21 10:09:49 +0100
7d106477d6

Fix router input validation for SKIP_TOKENIZER_IN_TGI=true (#267) Yuan Wu 2025-01-21 17:08:53 +0800
b980848abf

Flash Transformers modeling backend support (#2913) Cyril Vallez 2025-01-21 10:01:51 +0100
a0e75b1311 misc(ci): attempt to fix sccache not building trtllm again Morgan Funtowicz 2025-01-21 00:19:39 +0100
a4d069fe07 misc(ci): attempt to fix sccache not building trtllm Morgan Funtowicz 2025-01-20 23:12:45 +0100
edfafeb46c misc(ci): fix warnings Morgan Funtowicz 2025-01-20 22:44:31 +0100
d0b8e2eb25 misc(ci): give everything aws needs Morgan Funtowicz 2025-01-20 21:12:24 +0100
0d9ec75f27

oupsi Cyril Vallez 2025-01-20 18:42:12 +0100
70ada578b9

check for non-native models Cyril Vallez 2025-01-20 18:01:12 +0100
374493f830

Wat? Nicolas Patry 2025-01-20 17:57:58 +0100
93e343e11d

Remove the dummy test, only increase the read timeout. Nicolas Patry 2025-01-20 17:40:58 +0100
1ee74e5512

Remove legacy ENV directive. Nicolas Patry 2025-01-20 17:33:53 +0100
4cc842e556

Longer timeout ? Nicolas Patry 2025-01-20 17:26:58 +0100
9c955105d8

More read timeout ? Nicolas Patry 2025-01-20 16:47:35 +0100
2ef3002c2b

Update __init__.py Cyril Vallez 2025-01-20 16:37:41 +0100
c9f9cd165b

Trying to avoid the random timeout. Nicolas Patry 2025-01-20 16:27:39 +0100
6d9c011f51

move the import to avoid device issue Cyril Vallez 2025-01-20 16:11:41 +0100
9af3ea4b70

device check Cyril Vallez 2025-01-20 15:55:31 +0100
52afdcc281

update comment Cyril Vallez 2025-01-20 15:25:10 +0100
6e0f37c0ca

revert + style + minor improvements Cyril Vallez 2025-01-20 15:13:24 +0100
7c9ee5655f misc(ci): give everything aws needs Morgan Funtowicz 2025-01-20 15:10:14 +0100
16162602c2 Add fp8 support moe models Mohit Sharma 2025-01-20 13:55:54 +0000