text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-12 04:44:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

19ea85f8dc

Updating the flake. (#2404) Nicolas Patry 2024-08-12 18:09:16 +0200
93c061ac79

Updating the flake. Nicolas Patry 2024-08-12 17:53:26 +0200
b84bb19ece fix: prefer recent gptq changes fix-release-tests drbh 2024-08-12 15:51:19 +0000
7e773b0f20 fix: superseed gptq changes with main drbh 2024-08-12 15:18:02 +0000
3f12750a18 fix: marlin repeat scale for fp8 and bump snapshots drbh 2024-08-09 16:39:16 +0000
df9eb38733 fix: include correct exllama methods based on version drbh 2024-08-08 20:42:41 +0000
e99dd84b9a fix: move GPTQWeight into file to avoid circular import drbh 2024-08-08 19:52:23 +0000
700e64c5b9 fix: update mamba snap and run other release tests drbh 2024-08-08 17:55:24 +0000
add7908f0f fix: update mt0, mamba and grammar tests drbh 2024-08-08 15:17:45 +0000
c3e358e8b5 fix: update deepseek and gemma tests drbh 2024-08-08 14:29:28 +0000
57efa7ab8f fix: run bloom in non release and update snapshots drbh 2024-08-08 13:47:49 +0000
30395b09f4

fix: improve completions to send a final chunk with usage details (#2336) drbh 2024-08-12 11:26:11 -0400
4c3f8a70a1

fix: allocate tmp based on sgmv kernel if available (#2345) drbh 2024-08-12 11:24:32 -0400
d5d168a4d2 test throughput Xuan Son Nguyen 2024-08-12 17:23:11 +0200
155f9c98e2

feat: validate template variables before apply and improve sliding wi… (#2403) drbh 2024-08-12 10:58:40 -0400
298efa41c5 fix: improve missing template var test drbh 2024-08-12 14:35:42 +0000
2551456fff feat: validate template variables before apply and improve sliding window check drbh 2024-08-12 14:16:09 +0000
136bcc8128

Keeping the benchmark somewhere (#2401) Nicolas Patry 2024-08-12 15:22:02 +0200
bf6d60a07b

Keeping the benchmark somewhere Daniël de Kok 2024-08-06 12:36:15 +0000
8deeaca4ff

Add support for prefix caching to the v3 router (#2392) Daniël de Kok 2024-08-12 14:59:17 +0200
b6bb1d5160

Cpu dockerimage (#2367) Wang, Yi 2024-08-12 20:10:30 +0800
84bc3d7b7d

Fixing import exl2 (#2399) Nicolas Patry 2024-08-12 14:08:59 +0200
730fa00e20

Adding launcher to build. (#2397) Nicolas Patry 2024-08-12 14:08:46 +0200
9c739651cd

Upgrade fbgemm (#2398) Nicolas Patry 2024-08-12 14:08:38 +0200
5f002c678f

Fixing import exl2 Nicolas Patry 2024-08-12 12:23:46 +0200
7fde42c6e8

Fix fbgemm version Nicolas Patry 2024-08-12 11:50:02 +0200
b727e0aedc only 10 VUs Xuan Son Nguyen 2024-08-12 11:45:30 +0200
fc1853adac

Upgrade fbgemm Nicolas Patry 2024-08-12 11:29:13 +0200
7e694bbab7

Adding launcher to build. Nicolas Patry 2024-08-12 09:42:58 +0200
01a515dea2

nix: add router to the devshell (#2396) Daniël de Kok 2024-08-12 09:28:38 +0200
ccf4995744 nix: add router to the devshell Daniël de Kok 2024-08-12 06:41:39 +0000
d403575c43

Make bf16 default for hpu, fix script (#205) Abhilash Majumder 2024-08-11 14:18:35 +0530
cf2ff5a1dd

Revert PR#178 (#191) Sun Choi 2024-08-11 00:29:30 -0700
535335f088

fix(router): Fix appending to message content Simone Rossi 2024-08-10 17:53:59 +0200
a41e974c3b

Merge branch 'habana-main' into v2.0.4 regisss 2024-08-10 12:54:00 +0200
7bc16deb48 wip: debug gemma and flash explore-t4-gemma-issues drbh 2024-08-09 23:08:54 +0000
8dcc7d3f6b

Update flake for 9.0a capability in Torch (#2394) Daniël de Kok 2024-08-09 22:36:51 +0200
7a6a6e5cc2 Update flake for 9.0a capability in Torch Daniël de Kok 2024-08-09 20:30:15 +0000
7825c0744a fix: update openapi schema drbh 2024-08-09 20:02:47 +0000
e57b6cccda fix: remove dev debug trait and unneeded mut drbh 2024-08-09 15:48:55 -0400
515cd66705 fix: include finish reason string drbh 2024-07-30 21:07:42 +0000
c330491223 fix: improve completions to send a final chunk with usage details drbh 2024-07-30 21:02:32 +0000
7101bf2993 fix: re add copy build artifacts step for punica kernels drbh 2024-08-09 16:53:25 +0000
88ac607f1c

Moving the docs. Nicolas Patry 2024-08-09 18:15:12 +0200
8140b2294f

Bad rebase Nicolas Patry 2024-08-09 18:11:43 +0200
173e5e6c4b

fix: Message API link Hugo Larcher 2024-07-16 11:23:02 +0200
3c912f40bb

doc: Refactor API reference Hugo Larcher 2024-07-15 20:22:22 +0200
5e70943ed0

doc: Add API reference Hugo Larcher 2024-07-15 18:16:22 +0200
fc2d1134b8

doc: Add metrics documentation and add a 'Reference' section Hugo Larcher 2024-07-15 14:15:55 +0200
5746a8d0c3 Add support for prefix caching to the v3 router Daniël de Kok 2024-08-09 14:54:13 +0000
0d06aed02d

feat: add guideline to chat request and template (#2391) drbh 2024-08-09 10:56:45 -0400
7735b385dc Prefix caching WIP feature/radix-prefix-cache Daniël de Kok 2024-08-09 11:47:14 +0000
7a48a84784

Using an enum for flash backens (paged/flashdecoding/flashinfer) (#2385) Nicolas Patry 2024-08-09 16:41:17 +0200
d94b0fcf52 fix: add template test and update docs drbh 2024-08-09 14:16:35 +0000
3b25cd3213 feat: add guideline to chat request and template drbh 2024-08-09 13:53:47 +0000
6e127dcc96

flake: use rust-overlay (#2390) Daniël de Kok 2024-08-09 15:24:21 +0200
f2c5fb6cbe flake: use rust-overlay Daniël de Kok 2024-08-09 13:02:57 +0000
9f039ad4b3 flake: use rust-overlay nix/cargo-clippy Daniël de Kok 2024-08-09 13:02:57 +0000
b2b9c42724

Update documentation for Supported models (#2386) Vaibhav Srivastav 2024-08-09 15:01:34 +0200
9aea62b381

Merge branch 'huggingface:main' into vb/followup-doc-fixes Vaibhav Srivastav 2024-08-09 14:57:04 +0200
977534bcb8

flake: add fmt and clippy (#2389) Daniël de Kok 2024-08-09 14:56:20 +0200
a4b1806557

Fix clippy and fmt. Nicolas Patry 2024-08-09 14:54:52 +0200
6ee7e2e208 flake: add fmt and clippy Daniël de Kok 2024-08-09 12:53:14 +0000
c9813b935b Other minor updates. Vaibhav Srivastav 2024-08-09 14:49:04 +0200
379e1659a9

Clippy. Nicolas Patry 2024-08-09 14:39:49 +0200
d84b98b40f

Early exit on server too. Nicolas Patry 2024-08-09 12:47:39 +0200
6bcad66c6e

Using an enum for flash backens (paged/flashdecoding/flashinfer) Nicolas Patry 2024-08-09 12:31:08 +0200
952b450a3b

Using HF_HOME instead of CACHE to get token read in addition to models. (#2288) Nicolas Patry 2024-08-09 14:25:44 +0200
27daf69ea8

Merge branch 'huggingface:main' into vb/followup-doc-fixes Vaibhav Srivastav 2024-08-09 14:08:37 +0200
cd1e2cd2cf add docker load_tests Xuan Son Nguyen 2024-08-09 13:16:49 +0200
03bfff5a01 up. Vaibhav Srivastav 2024-08-09 12:35:22 +0200
c6d5039cd7

Add experimental flake (#2384) Daniël de Kok 2024-08-09 12:32:37 +0200
2bd9129f11 Minor doc fixes Vaibhav Srivastav 2024-08-09 12:29:20 +0200
5b8218fbef Add flake.nix Daniël de Kok 2024-08-09 10:22:17 +0000
7830de1566

Add FlashInfer support (#2354) Daniël de Kok 2024-08-09 11:42:00 +0200
bad8ade7ae

Using HF_HOME instead of CACHE to get token read in addition to models. Nicolas Patry 2024-07-23 15:42:55 +0000
6d06473cf4

Pr 2352 ci branch (#2382) drbh 2024-08-09 04:54:32 -0400
cb3ae30284

Update Quantization docs and minor doc fix. (#2368) Vaibhav Srivastav 2024-08-08 22:06:57 +0200
383975995b up Vaibhav Srivastav 2024-08-08 19:56:18 +0000
f852190060

fix: prefer hidden_activation over hidden_act in gemma2 (#2381) drbh 2024-08-08 14:08:56 -0400
bec657973d fix: update v3 scheduler and ensure max_batch_size > 0 drbh 2024-08-08 17:47:26 +0000
0781053d3a fix: prefer hidden_activation over hidden_act in gemma2 drbh 2024-08-08 12:58:05 -0400
2ca5980634

Pr 2337 ci branch (#2379) drbh 2024-08-08 12:30:29 -0400
6497ae61e2 Merge commit 'refs/pull/2352/head' of github.com:huggingface/text-generation-inference into pr-2352-ci-branch drbh 2024-08-08 16:27:00 +0000
689b1abbf6

fix EleutherAI/gpt-neox-20b does not work in tgi (#2346) Wang, Yi 2024-08-09 00:08:52 +0800
b921a46dc0 Merge commit 'refs/pull/2337/head' of github.com:huggingface/text-generation-inference into pr-2337-ci-branch drbh 2024-08-08 15:21:57 +0000
82d19d7723

Pr 2374 ci branch (#2378) drbh 2024-08-08 11:14:06 -0400
e1268596bc fix: syntax/style tweak drbh 2024-08-08 14:10:51 +0000
e36a9c57f0

Code expects newer huggingface_hub versions, tested and this resolves issues with streaming response format (#190) geoffrey papilion 2024-08-08 04:07:27 -0700
256a97231b

Removed redundant and crash causing regions to be a subject to Torch compile (#194) Jacek Czaja 2024-08-08 13:06:20 +0200
d7c5ef6cd2

Update __init__.py Praz 2024-08-08 13:50:02 +0530
bd4b23d0ba

Update __init__.py Praz 2024-08-08 13:43:01 +0530
a379d5536b

Fix the prefix for OPT model in opt_modelling.py #2370 (CI RUN) (#2371) drbh 2024-08-07 23:14:02 -0400
f98aaeeb27 fix: small syntax tweak drbh 2024-08-08 02:10:03 +0000
e01e1b7ca6 fix: run lints drbh 2024-08-08 01:35:42 +0000
21267f3ca3

add gptj modeling in TGI #2366 (CI RUN) (#2372) drbh 2024-08-07 21:32:37 -0400
e219397ee1 fix: adjust syntax typo again pr-2366-ci-branch drbh 2024-08-08 00:31:24 +0000
ce30a14139 fix: adjust syntax typo drbh 2024-08-08 00:03:12 +0000
7372a0dc38 fix: update docs for model addition drbh 2024-08-07 23:47:37 +0000
8094ecfc9e

fix: fix num_ln_in_parallel_attn attribute name typo in RWConfig (#2350) almersawi 2024-08-08 03:45:23 +0400