text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

73ebbd05f8 Pr 2451 ci branch (#2454) drbh 2024-08-26 20:19:38 -0400
7aebb953e2 Fix: don't apply post layernorm in SiglipVisionTransformer (#2459) drbh 2024-08-26 17:04:46 -0400
92ac02e4f2 nix: add default package (#2453) Daniël de Kok 2024-08-23 22:06:22 +0200
b7d1adc3e9 nix: add awq-inference-engine as server dependency (#2442) Daniël de Kok 2024-08-21 22:20:03 +0200
6654c2d11b Adding eetq to flake. (#2438) Nicolas Patry 2024-08-21 09:06:33 +0200
a5af557359 nix: add text-generation-benchmark to pure devshell (#2431) Daniël de Kok 2024-08-21 07:48:13 +0200
516392d790 nix: add pure server to flake, add both pure and impure devshells (#2430) Daniël de Kok 2024-08-20 22:07:33 +0200
635dde8af9 Prefix caching (#2402) Nicolas Patry 2024-08-20 11:15:30 +0200
ddba272a66 nix: update to CUDA 12.4 (#2429) Daniël de Kok 2024-08-19 09:28:38 +0200
cd208c5043 All integration tests back everywhere (too many failed CI). (#2428) Nicolas Patry 2024-08-16 21:19:46 +0200
53fdbe617d doc: Add metrics documentation and add a 'Reference' section (#2230) Hugo Larcher 2024-08-16 19:43:30 +0200
11d25a4bd3 FIxing the CI. Nicolas Patry 2024-08-16 14:21:29 +0200
85df9fc2db Further fixes. (#2426) Nicolas Patry 2024-08-16 13:21:44 +0200
df0e650891 Improve the Consuming TGI + Streaming docs. (#2412) Vaibhav Srivastav 2024-08-16 12:43:08 +0200
20ed7b598e nix: try to reduce the number of Rust rebuilds (#2424) Daniël de Kok 2024-08-16 10:01:01 +0200
f0181ed2d7 Upgrading the tests to match the current workings. (#2423) Nicolas Patry 2024-08-15 13:28:42 +0200
df6ea89da9 Fixing exl2 and other quanize tests again. (#2419) Nicolas Patry 2024-08-15 11:12:51 +0200
e5c39a5545 nix: build router incrementally (#2422) Daniël de Kok 2024-08-15 10:21:51 +0200
c3401e0b99 More fixes trtllm (#2342) Funtowicz Morgan 2024-08-14 12:02:05 +0200
4baa6ff59f Upgrading exl2. (#2415) Nicolas Patry 2024-08-14 11:58:08 +0200
bae161ab84 nix: partial incremental build of the router (#2416) Daniël de Kok 2024-08-14 11:06:28 +0200
ffc8fb0850 fix: adds causal to attention params (#2408) drbh 2024-08-13 10:19:46 -0400
7a4d831d17 add numa to improve cpu inference perf (#2330) Wang, Yi 2024-08-13 21:33:55 +0800
c5e4c1877b Adding more kernels to flake. (#2411) Nicolas Patry 2024-08-13 10:49:18 +0200
eb561bb715 nix: incremental build of the launcher (#2410) Daniël de Kok 2024-08-13 10:44:15 +0200
10b2be6536 fix: include create_exllama_buffers and set_device for exllama (#2407) drbh 2024-08-12 17:59:37 -0400
1f8c0f83e3 Pr 2395 ci run (#2406) drbh 2024-08-12 14:38:59 -0400
18d6be6af4 Updating the flake. (#2404) Nicolas Patry 2024-08-12 18:09:16 +0200
96e8fa37b0 fix: improve completions to send a final chunk with usage details (#2336) drbh 2024-08-12 11:26:11 -0400
3079865b60 fix: allocate tmp based on sgmv kernel if available (#2345) drbh 2024-08-12 11:24:32 -0400
8e6bfa2fc5 feat: validate template variables before apply and improve sliding wi… (#2403) drbh 2024-08-12 10:58:40 -0400
6393cdee63 Keeping the benchmark somewhere (#2401) Nicolas Patry 2024-08-12 15:22:02 +0200
f586cc7f0c Add support for prefix caching to the v3 router (#2392) Daniël de Kok 2024-08-12 14:59:17 +0200
b8efd6d00c Cpu dockerimage (#2367) Wang, Yi 2024-08-12 20:10:30 +0800
1daaddd072 Fixing import exl2 (#2399) Nicolas Patry 2024-08-12 14:08:59 +0200
fbe59c6267 Adding launcher to build. (#2397) Nicolas Patry 2024-08-12 14:08:46 +0200
8750dc878e Upgrade fbgemm (#2398) Nicolas Patry 2024-08-12 14:08:38 +0200
197dd3af12 nix: add router to the devshell (#2396) Daniël de Kok 2024-08-12 09:28:38 +0200
bb833389e0 Update flake for 9.0a capability in Torch (#2394) Daniël de Kok 2024-08-09 22:36:51 +0200
959add5e9b feat: add guideline to chat request and template (#2391) drbh 2024-08-09 10:56:45 -0400
849bd93dc3 Using an enum for flash backens (paged/flashdecoding/flashinfer) (#2385) Nicolas Patry 2024-08-09 16:41:17 +0200
df719fd527 flake: use rust-overlay (#2390) Daniël de Kok 2024-08-09 15:24:21 +0200
1d4a35a23c Update documentation for Supported models (#2386) Vaibhav Srivastav 2024-08-09 15:01:34 +0200
e9ba044250 flake: add fmt and clippy (#2389) Daniël de Kok 2024-08-09 14:56:20 +0200
afa14b7595 Using HF_HOME instead of CACHE to get token read in addition to models. (#2288) Nicolas Patry 2024-08-09 14:25:44 +0200
dc0fa60f55 Add experimental flake (#2384) Daniël de Kok 2024-08-09 12:32:37 +0200
4a16da5d49 Add FlashInfer support (#2354) Daniël de Kok 2024-08-09 11:42:00 +0200
6f2a468a64 Pr 2352 ci branch (#2382) drbh 2024-08-09 04:54:32 -0400
b1bc0ecb7f Update Quantization docs and minor doc fix. (#2368) Vaibhav Srivastav 2024-08-08 22:06:57 +0200
853fb96fec fix: prefer hidden_activation over hidden_act in gemma2 (#2381) drbh 2024-08-08 14:08:56 -0400
1057f28128 Pr 2337 ci branch (#2379) drbh 2024-08-08 12:30:29 -0400
3893d00927 fix EleutherAI/gpt-neox-20b does not work in tgi (#2346) Wang, Yi 2024-08-09 00:08:52 +0800
06b638f310 Pr 2374 ci branch (#2378) drbh 2024-08-08 11:14:06 -0400
9b1b545bb4 Fix the prefix for OPT model in opt_modelling.py #2370 (CI RUN) (#2371) drbh 2024-08-07 23:14:02 -0400
3ea8e8a2d5 add gptj modeling in TGI #2366 (CI RUN) (#2372) drbh 2024-08-07 21:32:37 -0400
11fab8a20c fix: fix num_ln_in_parallel_attn attribute name typo in RWConfig (#2350) almersawi 2024-08-08 03:45:23 +0400
3ccde430d9 fix: prefer original layernorm names for 180B (#2365) drbh 2024-08-06 15:25:30 -0400
db873be177 fix: default num_ln_in_parallel_attn to one if not supplied (#2364) drbh 2024-08-06 13:33:22 -0400
5400c7155d feat: return the generated text when parsing fails (#2353) drbh 2024-08-06 13:10:19 -0400
b4562e1369 feat: prefer stop over eos_token to align with openai finish_reason (#2344) drbh 2024-08-06 13:09:50 -0400
88e07f12cc feat: implement a templated endpoint for visibility into chat requests (#2333) drbh 2024-08-06 07:51:32 -0400
83d1f23fea fix: return the out tensor rather then the functions return value (#2361) drbh 2024-08-06 07:49:53 -0400
8b0f5feb02 feat: include local lora adapter loading docs (#2359) drbh 2024-08-05 12:36:44 -0400
688321bcc4 fix: attempt forward on flash attn2 to check hardware support (#2335) drbh 2024-08-05 09:11:40 -0400
48fec7b198 Unify attention output handling (#2343) Daniël de Kok 2024-08-01 17:03:28 +0200
ccddb30c02 Fix cache block size for flash decoding (#2351) Daniël de Kok 2024-08-01 15:38:57 +0200
d70da59c25 enable HuggingFaceM4/idefics-9b in intel gpu (#2338) Wang, Yi 2024-08-01 17:08:36 +0800
3c4f816ae3 refactor usage stats (#2339) Erik Kaunismäki 2024-07-31 16:29:07 +0200
c73d1d604f Pr 2290 ci run (#2329) drbh 2024-07-31 10:27:15 -0400
468e5c6874 Handle GPTQ-Marlin loading in GPTQMarlinWeightLoader (#2300) Daniël de Kok 2024-07-31 13:08:41 +0200
120d5773e8 Rebase TRT-llm (#2331) Nicolas Patry 2024-07-31 10:33:10 +0200
247a29f77c server quantize: store quantizer config in standard format (#2299) Daniël de Kok 2024-07-30 15:16:20 +0200
bafab73f76 fix: adjust test snapshots and small refactors (#2323) drbh 2024-07-29 11:38:38 -0400
b1d1d26559 patch-error-on-invalid-grammar (#2282) Erik Kaunismäki 2024-07-29 16:09:25 +0200
a574381cb4 fix: reject grammars without properties (#2309) drbh 2024-07-29 10:07:25 -0400
23a3927eb6 Install Marlin from standalone package (#2320) Daniël de Kok 2024-07-29 15:37:10 +0200
2c1d280fae Run ci api key (#2315) Erik Kaunismäki 2024-07-29 11:14:17 +0200
a87791d7c9 feat: add ruff and resolve issue (#2262) drbh 2024-07-26 10:29:09 -0400
fc6d80fdb8 Support tied embeddings in 0.5B and 1.5B Qwen2 models (#2313) Daniël de Kok 2024-07-26 14:57:24 +0200
1674f441d0 Fix registry name (#2307) Adrien 2024-07-25 16:06:00 +0200
d5e054342e Fixing idefics on g6 tests. (#2306) Nicolas Patry 2024-07-25 14:44:21 +0200
64ffd642fa Some small fixes for the Torch 2.4.0 update (#2304) Daniël de Kok 2024-07-25 13:34:44 +0200
69db13e5e5 Using g6 instead of g5. (#2281) Nicolas Patry 2024-07-25 11:21:17 +0200
7ebee37641 fix: refactor adapter weight loading and mapping (#2193) drbh 2024-07-24 15:32:14 -0400
457791f511 Split up layers.marlin into several files (#2292) Daniël de Kok 2024-07-24 16:33:26 +0200
d93931567d fix of use of unquantized weights in cohere GQA loading, also enable … (#2291) Wang, Yi 2024-07-24 16:44:02 +0800
204142153f fix crash in multi-modal (#2245) Wang, Yi 2024-07-24 16:39:08 +0800
a994f6aedd hotfix: update nccl OlivierDehaene 2024-07-23 23:31:28 +0200
34c472bd64 chore: update to torch 2.4 (#2259) OlivierDehaene 2024-07-23 20:39:43 +0000
b1077b077c hotfix: pin numpy (#2289) Daniël de Kok 2024-07-23 17:53:19 +0200
43f49141fd Add support for Llama 3 rotary embeddings (#2286) Daniël de Kok 2024-07-23 17:18:54 +0200
5390973c09 Preparing for release. (#2285) Nicolas Patry 2024-07-23 16:20:17 +0200
69b67b7add Add support for Mistral-Nemo by supporting head_dim through config (#2254) shaltielshmid 2024-07-23 16:00:07 +0300
26460f053d Add support for repacking AWQ weights for GPTQ-Marlin (#2278) Daniël de Kok 2024-07-23 13:08:20 +0200
919da25c3b fix(l4): fix fp8 logic on l4 (#2277) OlivierDehaene 2024-07-23 09:24:29 +0000
31eb03dbe2 Fixing mistral nemo. (#2276) Nicolas Patry 2024-07-23 11:16:03 +0200
568cc9f3d0 Softcapping for gemma2. (#2273) Nicolas Patry 2024-07-22 18:27:10 +0200
a7515b8af1 fix(server): fix fp8 weight loading (#2268) OlivierDehaene 2024-07-22 15:51:32 +0000
758a8b8423 legacy warning on text_generation client (#2271) Erik Kaunismäki 2024-07-22 12:00:17 +0200
a5aee82a69 Hotfix: fix of use of unquantized weights in Mixtral GQA loading (#2269) icyboy™ 2024-07-22 17:31:00 +0800