text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 04:14:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

b1aff577a0 Worse invention ever. Nicolas Patry 2024-02-14 10:09:00 +0000
0523031ffb ... Nicolas Patry 2024-02-14 10:05:29 +0000
69d1d3cde6 Bahs in yaml is not our friend. Nicolas Patry 2024-02-14 10:02:53 +0000
e36887cbf5 Install docker manually. Nicolas Patry 2024-02-14 10:00:33 +0000
05aef4dd1a Upgrade install buildx. Nicolas Patry 2024-02-14 09:57:15 +0000
85bf172653 Our runner docker in docker. Nicolas Patry 2024-02-14 09:52:34 +0000
524e06066b Small cleanup. Nicolas Patry 2024-02-14 09:22:38 +0000
d6b0fb9e25

Improving mamba runtime by using updates (#1552) Nicolas Patry 2024-02-14 09:54:10 +0100
1ffc3a03c8 Typo. Nicolas Patry 2024-02-13 21:52:02 +0000
7671a419a0

Upgrade intermediary layer for nvidia too. (#1557) Nicolas Patry 2024-02-13 22:46:16 +0100
b9ac720d1e Generous snapshot for load because of accumulations errors in the logprobs. Nicolas Patry 2024-02-13 18:15:14 +0000
2e44f082c8 Upgrade intermediary layer for nvidia too. Nicolas Patry 2024-02-13 18:00:00 +0100
c54b5c7f04 Remove tailscale. Nicolas Patry 2024-02-13 17:51:12 +0100
6f68bb14c7

Fixing glibc version in the runtime. (#1556) Nicolas Patry 2024-02-13 17:43:47 +0100
a83772c87b Self hosted for nvidia too. Nicolas Patry 2024-02-13 17:31:39 +0100
c804182300 Fixing glibc version in the runtime. Nicolas Patry 2024-02-13 17:29:03 +0100
31d965bf17 Our runner. Nicolas Patry 2024-02-13 17:15:45 +0100
246ad39d04

feat: add deserialize_with that handles strings or objects with content (#1550) drbh 2024-02-13 10:01:02 -0500
d9000a2bcb Update load . Nicolas Patry 2024-02-13 12:11:34 +0000
755ed82d25 Improving mamba runtime by using updates Nicolas Patry 2024-02-13 11:07:25 +0000
91dcfe83db fix: cargo fmt tweak drbh 2024-02-12 11:00:21 -0500
a86e726079 fix: remove dev test that relies on local file drbh 2024-02-12 10:53:13 -0500
3db6f0bb39 feat: add deserialize_with that handles strings or objects with content drbh 2024-02-12 09:48:03 -0500
0d794af6a5

feat: experimental support for cuda graphs (#1428) OlivierDehaene 2024-02-12 10:09:29 +0100
c85f737454 Fixing AMD dockerfile. Nicolas Patry 2024-02-09 19:27:53 +0000
9a5d97235b Going from earlier release (newer ones has bugs in shape it seems). Nicolas Patry 2024-02-09 16:26:34 +0000
8f93b47395 Upgrade the ubuntu version too. Nicolas Patry 2024-02-09 13:53:07 +0000
72f74bcbc4 Fix for AWQ. Nicolas Patry 2024-02-09 13:09:40 +0000
7143130ba4 Update docs after rebase. Nicolas Patry 2024-02-09 11:44:42 +0000
4b06f318cb Update dockerfile. Nicolas Patry 2024-02-09 11:39:46 +0000
903fbec604 Fixing AWQ. Nicolas Patry 2024-02-09 11:29:34 +0000
3ce42ba7ec Fixing all quantization kernels. Nicolas Patry 2024-02-09 11:12:12 +0000
4b524a305c Update the doc. Nicolas Patry 2024-02-08 09:26:38 +0000
bc95292eb8 Disable cuda graph with speculation (for now) and update the docs. Nicolas Patry 2024-02-08 09:22:19 +0000
4fd6e62655 fix OlivierDehaene 2024-01-15 18:24:22 +0100
33e94379c8 fix speculate OlivierDehaene 2024-01-10 17:40:37 +0100
ca20c304b3 add log OlivierDehaene 2024-01-10 17:17:48 +0100
8260dc00d8 fix env var OlivierDehaene 2024-01-10 17:17:19 +0100
9904f66966 fix value OlivierDehaene 2024-01-10 16:49:00 +0100
15fdd40587 feat: experimental support for cuda graphs OlivierDehaene 2024-01-10 16:34:39 +0100
1d929a243a fix: use TORCH_NCCL_AVOID_RECORD_STREAMS=1 OlivierDehaene 2024-01-09 17:59:16 +0100
532146338b

feat(router): add max_batch_size (#1542) OlivierDehaene 2024-02-09 12:38:41 +0100
a4e5801684

ROCm AWQ support (#1514) Ilyas Moutawwakil 2024-02-09 10:45:16 +0100
326f8e30ac Better error message on non rocm. Nicolas Patry 2024-02-09 09:44:53 +0000
c5ef81bed5

chore: bump ci rust version (#1543) drbh 2024-02-09 04:32:04 -0500
d9ee73eea5 chore: bump ci rust version drbh 2024-02-08 13:07:13 -0500
09b7c26bbd

feat(server): add frequency penalty (#1541) OlivierDehaene 2024-02-08 18:41:25 +0100
55e29c9564 my b OlivierDehaene 2024-02-08 17:28:54 +0100
2af011a1c0 use max_size in the batch task OlivierDehaene 2024-02-08 17:26:55 +0100
9e042bd117 update doc OlivierDehaene 2024-02-08 17:12:14 +0100
01e61bb8f6 fix rust test OlivierDehaene 2024-02-08 17:10:36 +0100
faaa9dfe0a feat(router): add max_batch_size OlivierDehaene 2024-02-08 17:01:20 +0100
a76821e0b2 Update llama gptq. Nicolas Patry 2024-02-08 15:42:33 +0000
81fa53f37b Fix tests. add_batch_dimension Nicolas Patry 2024-02-08 15:25:34 +0000
bc157af9b0 generate g_idx only for triton kernel IlyasMoutawwakil 2024-02-08 16:05:09 +0100
40f693b6b9 Fix PR. Nicolas Patry 2024-02-08 15:04:27 +0000
e29fb799cb Merge branch 'rocm-awq-support' of https://github.com/huggingface/text-generation-inference into rocm-awq-support IlyasMoutawwakil 2024-02-08 16:03:17 +0100
04d38a83be Updating the tests. Nicolas Patry 2024-02-08 14:59:35 +0000
cfacf91af8 fix logits processor OlivierDehaene 2024-02-08 12:49:24 +0100
75b492d720 feat(server): add frequency penalty OlivierDehaene 2024-02-08 12:46:39 +0100
2629193efa log message IlyasMoutawwakil 2024-02-05 09:26:47 +0100
76834c9989 none g_idx IlyasMoutawwakil 2024-02-02 14:42:42 +0100
bbe5bedea5 pass g_idx instead of changing triton kernel IlyasMoutawwakil 2024-02-02 14:34:15 +0100
646ab28285 typing IlyasMoutawwakil 2024-02-01 19:37:02 +0000
8074c40473 adapt awq weights to exllama/gptq kernels IlyasMoutawwakil 2024-02-01 18:35:41 +0000
212fdfffad revert changes IlyasMoutawwakil 2024-02-01 18:35:04 +0000
3ceeb85842 fix missing g_idx and eventual overflow in triton kernel IlyasMoutawwakil 2024-02-01 13:30:43 +0000
3963074ceb add triton fallback to awq IlyasMoutawwakil 2024-02-01 13:30:13 +0000
aa2014fc79 post process exllama model IlyasMoutawwakil 2024-02-01 12:48:17 +0100
75086526d3 awq fallback to exllama IlyasMoutawwakil 2024-02-01 12:06:02 +0100
461dd6f1c7 fix exllama overflows IlyasMoutawwakil 2024-02-01 12:05:36 +0100
39af000cb9

Update to peft 0.8.2 (#1537) Jason Stillerman 2024-02-08 06:44:04 -0500
bd405e035b

Impl simple mamba model (#1480) drbh 2024-02-08 04:19:45 -0500
b99f784cb3 feat: conditionally include mamba drbh 2024-02-08 00:34:13 +0000
1734540211

feat: use existing add_generation_prompt variable from config in temp… (#1533) drbh 2024-02-07 03:35:53 -0500
2c6ef7c93a fix: add missing accepted_ids to batch_top_tokens drbh 2024-02-07 03:57:35 +0000
48624fee25 Merge branch 'impl-simple-mamba-model' of github.com:huggingface/text-generation-inference into impl-simple-mamba-model drbh 2024-02-07 03:24:32 +0000
deed8e8154 fix: adjust typos and docker build drbh 2024-02-07 03:24:28 +0000
9146ba00a7

Merge branch 'main' into impl-simple-mamba-model drbh 2024-02-06 18:38:20 -0500
5b30a425f6 fix: update selective state Makefile drbh 2024-02-06 23:37:00 +0000
50ca04b052 feat: update docker for mamba drbh 2024-02-06 21:15:16 +0000
36a4853c4e fix: rename tests and snapshots drbh 2024-02-06 20:39:52 +0000
5e102183d8 feat: prefer triton ops and batch conv drbh 2024-02-06 20:38:28 +0000
e10530d4f3 update to peft 0.8.2 update_peft Jason Stillerman 2024-02-06 14:41:15 -0500
8319e854c8 Fix mamba load. Nicolas Patry 2024-02-06 18:57:24 +0000
53b6b8bd08 feat: update and add tests for add_generation_prompt drbh 2024-02-06 11:43:43 -0500
ff0428a351 feat: defaults add_generation_prompt true drbh 2024-02-06 09:08:35 -0500
3caa9b9cb7 feat: support batching drbh 2024-02-06 01:22:25 +0000
63bc4c59d4 fix: improve step to use batch drbh 2024-02-06 00:17:04 +0000
a4f1916a56 feat: avoid triton selective_state_update drbh 2024-02-05 21:34:28 +0000
76093c79ac feat: use existing add_generation_prompt variable from config in template drbh 2024-02-05 10:01:40 -0500
29a8d5a3a1 Clippy. Nicolas Patry 2024-02-05 14:39:45 +0100
e1dc168188 Adding batch_dimension_flag (to be used for Neuron other forced padding targets). Nicolas Patry 2024-02-05 14:29:32 +0100
cda5751b41 log message IlyasMoutawwakil 2024-02-05 09:26:47 +0100
0f124cbc52 fix: revise non batching tests drbh 2024-02-03 05:04:00 +0000
3a42765cab feat: use cache when decoding drbh 2024-02-02 21:50:51 +0000
0da00be52c

feat: add ie update to message docs (#1523) drbh 2024-02-02 10:31:11 -0500
58ddedec16

Update docs/source/messages_api.md drbh 2024-02-02 09:58:29 -0500
af2c589cef none g_idx IlyasMoutawwakil 2024-02-02 14:42:42 +0100
994ed8e10d pass g_idx instead of changing triton kernel IlyasMoutawwakil 2024-02-02 14:34:15 +0100