text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

653193a942 Improve support for GPUs with capability < 8 (#2575) Daniël de Kok 2024-09-27 16:19:42 +0200
bc28f86903 Fix build with --features google (#2566) Alvaro Bartolome 2024-09-26 11:41:38 +0200
6976cf8c4c Add LoRA adapters support for Gemma2 (#2567) Alvaro Bartolome 2024-09-26 10:54:08 +0200
0817643b58 remove LORA_ADAPTERS_PATH (#2563) Nicholas Broad 2024-09-24 16:20:15 -0700
a684a81927 More tensor cores. (#2558) Nicolas Patry 2024-09-24 23:57:26 +0200
97d4bdd685 Cleanup Vertex + Chat (#2553) Nicolas Patry 2024-09-24 23:37:17 +0200
25e0edf337 Hotfixing main. (#2562) Nicolas Patry 2024-09-24 23:00:43 +0200
782130df17 Adding note for private models in quick-tour document (#2548) Aritra Roy Gosthipaty 2024-09-24 18:36:53 +0530
5247f8938d Simplify crossterm imports (#2545) Orhun Parmaksız 2024-09-24 15:57:20 +0300
8c6d3e074f Update the link to the Ratatui organization (#2546) Orhun Parmaksız 2024-09-24 15:51:48 +0300
d4f995e718 Add DenseMoELayer and wire it up in Mixtral/Deepseek V2 (#2537) Daniël de Kok 2024-09-24 14:27:06 +0200
32d50c2ea7 Add support for scalar FP8 weight scales (#2550) Daniël de Kok 2024-09-24 13:57:40 +0200
55115ed700

Skip the test let's see if it's always the first tests that fails. Nicolas Patry 2024-10-25 11:00:29 +0200
ba5fc7d922

Add support for stop words in TRTLLM (#2678) Funtowicz Morgan 2024-10-25 10:58:34 +0200
68cfc94f40 Hotfixing main (#2556) Nicolas Patry 2024-09-24 11:51:14 +0200
79ac2b741d Micro cleanup. (#2555) Nicolas Patry 2024-09-24 11:19:24 +0200
73e6090d53 chore: Add old V2 backend (#2551) OlivierDehaene 2024-09-24 08:38:17 +0200
9aed9d5f81 nix: remove unused _server.nix file (#2538) Daniël de Kok 2024-09-23 09:43:23 +0200
b590310255 Add missing import package yuanwu 2024-10-25 08:52:24 +0000
79690a0d65

Update for new API Nicolas Patry 2024-10-25 10:46:05 +0200
a7465ba67d

fix kernel OlivierDehaene 2024-10-25 10:37:10 +0200
347f3f51da

fix kernel OlivierDehaene 2024-10-24 19:17:31 +0200
d1e95ceaff

cast to int32 OlivierDehaene 2024-10-24 19:01:40 +0200
ea66379e3c

feat: add triton kernels to decrease latency of large batches OlivierDehaene 2024-10-24 16:48:46 +0200
8ebe77b3be Simplify the warmup yuanwu 2024-10-24 06:26:48 +0000
84f14a1437

feat(trtllm): detect stop_words from generation_config.json Morgan Funtowicz 2024-10-23 16:05:59 +0200
13a68e223a

chore(docker): install transformers Morgan Funtowicz 2024-10-23 15:48:24 +0200
381262337a

chore(docker): add mpi to ld_library_path Morgan Funtowicz 2024-10-23 15:48:17 +0200
e4f67f70a2

feat(docker): add python3.10 dev to runtime deps Morgan Funtowicz 2024-10-22 23:05:55 +0200
17573d42d8

feat(docker): build with-slurm ompi Morgan Funtowicz 2024-10-22 23:05:45 +0200
50a19aa326

chore(router): minor refactorings Morgan Funtowicz 2024-10-22 23:05:10 +0200
b939a0f7d7

chore(trtllm): minor fix Morgan Funtowicz 2024-10-21 23:50:02 +0200
cdba16fd23

chore(trtllm): ensure max throughput scheduling policy is selected Morgan Funtowicz 2024-10-21 23:40:54 +0200
d659cb0113

chore(trtllm): validate there are enough GPus on the system for the desired model Morgan Funtowicz 2024-10-21 23:40:38 +0200
c2bb199fb1

chore(trtllm): minor refactoring Morgan Funtowicz 2024-10-21 23:40:20 +0200
703c26eca7

chore(trtllm): use GetParallelConfig Morgan Funtowicz 2024-10-21 23:39:44 +0200
c90680ed30

chore(trtllm): define a macro for SizeType cast Morgan Funtowicz 2024-10-21 23:39:08 +0200
16bb4b670b

chore(trtllm): create specific parallelconfig factory and logging init methods Morgan Funtowicz 2024-10-21 23:38:42 +0200
171a5638b1

feat(trtllm): add stop words handling Morgan Funtowicz 2024-10-21 17:00:45 +0200
e711947e3e

chore(ffi):formatting Morgan Funtowicz 2024-10-21 16:59:30 +0200
17073267c0

feat(post_processing): max_new_tokens is const evaluated now Morgan Funtowicz 2024-10-21 16:57:46 +0200
3af45189b3

chore(looper): cleanup a bit more Morgan Funtowicz 2024-10-21 16:57:26 +0200
7f383bf4dc

feat(trtllm): rewrite health to not account for current state Morgan Funtowicz 2024-10-21 15:55:38 +0200
c3fb2ecdc0

Merge branch 'main' into auto_length Nicolas Patry 2024-10-25 10:20:00 +0200
123ff3a83e

Fixing bad rebase. Nicolas Patry 2024-10-25 09:58:46 +0200
0bd9171556

Avoiding timeout for bloom tests. Nicolas Patry 2024-10-25 09:48:57 +0200
db68bd0524

Fixing mt0 test. (#2692) Nicolas Patry 2024-10-25 09:46:39 +0200
f16121002c

Fixing mt0 test. Nicolas Patry 2024-10-25 09:34:15 +0200
cece8635f8

Fixing rocm gptq by using triton code too (renamed cuda into triton). (#2691) Nicolas Patry 2024-10-25 09:17:57 +0200
7dc2adf7e9

Fixing rocm gptq by using triton code too (renamed cuda into triton). Nicolas Patry 2024-10-25 07:26:33 +0200
bbbd9a6dd2

Deprecation message. Nicolas Patry 2024-10-16 18:50:33 +0200
d4d4321814

Lint. Nicolas Patry 2024-10-16 15:07:01 +0200
b07935b04f

Ellide lifetime. Nicolas Patry 2024-09-25 21:38:31 +0200
f20ef614bd

Adding the legacy handle. Nicolas Patry 2024-09-25 14:37:36 +0200
cd355d08a9

Fixing mamba by using the transformers version. Nicolas Patry 2024-09-25 03:37:12 +0200
9d7a95b24b

Fixing the GIL locking. Nicolas Patry 2024-09-25 01:18:05 +0200
c0151cc14a

Flake.lock update ? Nicolas Patry 2024-09-24 16:22:17 +0200
5bc1fe84eb

Fixing the tests. Nicolas Patry 2024-09-24 15:45:10 +0200
b89b9fd016

Remove redundancy. Nicolas Patry 2024-09-17 17:10:30 +0200
9d702bcde3

Handling potential lack of offsets (python tokenizer) Nicolas Patry 2024-09-17 16:56:19 +0200
5ba7805f1c

We can have a tokenizer anywhere. Nicolas Patry 2024-09-17 16:16:51 +0200
43df056eee

[TENSORRT-LLM] - Implement new looper thread based backend (#2357) Funtowicz Morgan 2024-10-25 07:17:14 +0200
4463856cc7

Fix bad rebase Nicolas Patry 2024-10-25 07:14:41 +0200
b4b6322ede

Lint. Nicolas Patry 2024-10-25 07:10:34 +0200
01b82b58d2

Merge branch 'main' into trtllm-executor-thread Nicolas Patry 2024-10-25 07:06:35 +0200
84b4a49093

Upgrade outlines to 0.1.1 Alex Weston 2024-10-16 13:58:54 -0400
ed87b464b4

Fixing "deadlock" when python prompts for trust_remote_code by always (#2664) Nicolas Patry 2024-10-25 06:39:21 +0200
c6281a4893 Switch from fbgemm-gpu w8a8 scaled matmul to vLLM/marlin-kernels Daniël de Kok 2024-10-24 15:29:33 +0000
eab07f746c

Add support for FP8 KV cache scales (#2628) Daniël de Kok 2024-10-24 16:36:18 +0200
f311643fff fix: improve find_segments via numpy diff drbh 2024-10-24 10:16:34 -0400
14a0df3a38

Fix Phi 3.5 MoE tests (#2684) Daniël de Kok 2024-10-24 15:21:50 +0200
1b914f37e7

flashinfer: reminder to remove contiguous call in the future (#2685) Daniël de Kok 2024-10-24 14:59:56 +0200
996413b8b0 flashinfer: reminder to remove contiguous call in the future Daniël de Kok 2024-10-24 12:42:52 +0000
a68fae05e9 can_scale: check that the attention is flashinfer Daniël de Kok 2024-10-24 12:35:30 +0000
9bbbe47c82 Fix Phi 3.5 MoE tests Daniël de Kok 2024-10-24 12:06:21 +0000
e3db525917

Fix integration mt0 (transformers update). auto_length Nicolas Patry 2024-10-24 11:54:11 +0200
199973cc3c

Simple updates. Nicolas Patry 2024-10-24 11:39:02 +0200
1f18cb6aa6 Update FP8 KV cache test to use checkpoint with scales Daniël de Kok 2024-10-21 11:18:52 +0000
ba4ac96399 Add support for FP8 KV cache scales Daniël de Kok 2024-10-09 11:50:21 +0000
cacaba64c3

Revert doc text. Nicolas Patry 2024-10-24 10:06:59 +0200
6994fa12f8

Updating logic + non flash. Nicolas Patry 2024-10-24 09:58:05 +0200
10534511ea

Much simpler logic after the overhead. Nicolas Patry 2024-10-24 06:55:25 +0200
9cee00eec3 feat(trtllm): detect stop_words from generation_config.json Morgan Funtowicz 2024-10-23 16:05:59 +0200
6376fecc6c chore(docker): install transformers Morgan Funtowicz 2024-10-23 15:48:24 +0200
ef0031182e chore(docker): add mpi to ld_library_path Morgan Funtowicz 2024-10-23 15:48:17 +0200
41c2623735

feat: allow any supported payload on /invocations (#2683) OlivierDehaene 2024-10-23 13:26:01 +0200
27ff1871b5

hotfix: fix flashllama OlivierDehaene 2024-10-23 13:22:31 +0200
2c8a51a474

update doc OlivierDehaene 2024-10-23 12:20:20 +0200
03c9388bf7

feat: natively support Granite models (#2682) OlivierDehaene 2024-10-23 12:04:05 +0200
25b97fff49

Update doc OlivierDehaene 2024-10-23 12:03:46 +0200
849d8821ab

QuantLinear is rocm compatible. Nicolas Patry 2024-10-23 18:02:50 +0800
70483428ee

update openAPI OlivierDehaene 2024-10-23 11:59:41 +0200
09dfff62ff

feat: allow any supported payload on /invocations OlivierDehaene 2024-10-23 11:51:13 +0200
82a6cb82e1

fix. Nicolas Patry 2024-10-23 17:26:18 +0800
9897edb842

feat: natively support Granite models OlivierDehaene 2024-10-23 11:08:09 +0200
f58eb70ebf

Make moe-kernels and marlin-kernels mandatory in CUDA installs (#2632) Daniël de Kok 2024-10-23 11:07:31 +0200
b126bf4785

Revert pr 235 as flash attention is not really enabled for gemma (#239) Thanaji Rao Thakkalapelli 2024-10-23 01:58:57 -0700
8686a0fc6d

Merge branch 'habana-main' into 2.3.0 yuanwu2017 2024-10-23 16:32:12 +0800
67ee45a270 Pass the max_batch_total_tokens to causal_lm yuanwu 2024-10-10 07:31:50 +0000
5c3efbc763

Attempt #2 Nicolas Patry 2024-10-23 15:23:39 +0800