text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-06-18 23:32:06 +00:00

Commit Graph

Select branches

Hide Pull Requests

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi/add-ci

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

improve-docs

improve-dynamic-message-content

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.4

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3258

#3260

#3261

#3262

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3267

#3268

#3269

#327

#3270

#3270

#3271

#3273

#3273

#328

#329

#33

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

419ecd0167 fix qwen2_5 Mohit Sharma 2025-04-24 14:08:23 +0000
8c782858bb

Pre commit Nicolas Patry 2025-04-24 15:51:01 +0200
3bb514ddd8 remove kwargs and redundant args Mohit Sharma 2025-04-24 13:33:22 +0000
d7a609d4ad

Fixing the makefile by using lockfile. Nicolas Patry 2025-04-24 15:30:51 +0200
90989a4a04

Put more wiggle room. Nicolas Patry 2025-04-24 14:48:47 +0200
36c5ec2abe improve headdim Mohit Sharma 2025-04-24 09:55:14 +0000
b86a73d72b remove port Mohit Sharma 2025-04-24 09:52:17 +0000
8015f5f258 Merge branch 'main' into add_vlm_chunking_optimized Mohit Sharma 2025-04-24 09:50:44 +0000
d58ec388bf review comments Mohit Sharma 2025-04-24 09:49:29 +0000
375802948d

Warmup gaudi backend (#3172) Wang, Yi 2025-04-24 15:57:08 +0800
02715dc53f

Add option to configure prometheus port (#3187) Mohit Sharma 2025-04-23 20:43:25 +0530
67c51d7c5e

Fixing format after rebase. Nicolas Patry 2025-04-23 12:23:39 +0200
1cbda4f541

add port for trtllm and llamacpp Mohit Sharma 2025-04-23 10:12:22 +0000
12b1cf89cf

fix doc Mohit Sharma 2025-04-23 07:38:52 +0000
e38c296b94

add prometheus port Mohit Sharma 2025-04-22 12:44:15 +0000
15926210d3 disable chunking for qwen Mohit Sharma 2025-04-23 08:09:51 +0000
dd91b60998 nit Mohit Sharma 2025-04-22 14:41:20 +0000
f1da19df41 rename vars Mohit Sharma 2025-04-22 13:54:39 +0000
63ddba24b4 rename vars Mohit Sharma 2025-04-22 12:46:36 +0000
136b9897d4 add prometheus port Mohit Sharma 2025-04-22 12:44:15 +0000
6545cdde0d optimizations Mohit Sharma 2025-04-22 07:49:45 +0000
2f67c53075

nit Mohit Sharma 2025-04-22 02:06:57 +0530
26212b9f35

fix inputs_embeds Mohit Sharma 2025-04-22 02:03:34 +0530
f34b06ca3b

nit Mohit Sharma 2025-04-22 01:58:00 +0530
46ff016490

improve Mohit Sharma 2025-04-22 01:40:42 +0530
6ed540b52f add improvements Mohit Sharma 2025-04-21 15:28:18 +0000
be8e60a918 add improvements Mohit Sharma 2025-04-21 15:25:03 +0000
7237e8e6bf update pixel_values add_vlm_chunking Mohit Sharma 2025-04-19 17:12:23 +0000
52e4186c2a fix idefics Mohit Sharma 2025-04-19 14:39:24 +0000
b86919a87a fixes Mohit Sharma 2025-04-19 10:26:56 +0000
526a8785ed add encoder cache free Mohit Sharma 2025-04-18 16:00:35 +0000
44ed5efbcc working Mohit Sharma 2025-04-18 14:57:37 +0000
8f8819795f

Fixing CI (#3184) Nicolas Patry 2025-04-18 13:07:18 +0200
f17367e883

Fixing CI Nicolas Patry 2025-04-18 12:48:07 +0200
95ccba3705

Bump sccache to 0.10.0 (#3179) Alvaro Bartolome 2025-04-18 12:45:32 +0200
92909f3f33

add logic Mohit Sharma 2025-04-18 12:37:40 +0530
b400c275e4

Get opentelemetry trace id from request headers instead of creating a new trace (#2648) Hyeongchan Kim 2025-04-18 16:06:41 +0900
5d14a7fe3d

Merge branch 'main' into feature/get-trace-id-from-req-headers Nicolas Patry 2025-04-18 09:05:56 +0200
84ab88d843

Support flashinfer for Gemma3 prefill (#3167) Daniël de Kok 2025-04-17 18:07:41 +0200
516b2d1c1d Pending changes exported from your codespace DIVINEDP 2025-04-17 08:08:54 +0000
417c18c5cd Initial commit DIVINEDP 2025-04-17 08:08:53 +0000
83e7e21b4c

Rename ACTIONS_CACHE_URL to ACTIONS_RESULTS_URL Alvaro Bartolome 2025-04-16 10:51:49 +0200
6620f564b6

Ensure that sccache version is 0.10.0 or higher Alvaro Bartolome 2025-04-16 10:51:30 +0200
a7aff220e0

Merge fd92054e1d into 4645678ff0 Curtis Ruck 2025-04-16 17:11:23 +0900
01f17d526c Merge branch 'main' into warmup_gaudi_backend Wang, Yi A 2025-04-15 22:16:42 -0700
bf3987e25e pingpong optimization issue fix Wang, Yi A 2025-04-15 21:56:51 -0700
4645678ff0

Hotfix gaudi2 with newer transformers. (#3176) Nicolas Patry 2025-04-15 12:39:28 +0200
cedb5f07c0

Hotfix gaudi2 with newer transformers. Nicolas Patry 2025-04-15 12:27:22 +0200
ad765cd06b

Hotfixing gaudi deps. (#3174) Nicolas Patry 2025-04-15 11:55:28 +0200
5bb27a1d6b

Hotfixing gaudi deps. Nicolas Patry 2025-04-15 11:54:26 +0200
16b4b7974a

Upgrading the dependencies in Gaudi backend. (#3170) Nicolas Patry 2025-04-15 11:49:06 +0200
7e3f072ea4

Upgrading transformers version. Nicolas Patry 2025-04-15 11:35:46 +0200
459fbdebe3

transformers flash llm/vlm enabling in ipex (#3152) Wang, Yi 2025-04-15 17:08:01 +0800
302c773c99

Merge 2a10a28d08 into 449cee49ca Mohit Sharma 2025-04-15 13:44:04 +0530
449cee49ca

setuptools <= 70.0 is vulnerable: CVE-2024-6345 (#3171) Nicolas Patry 2025-04-15 10:09:37 +0200
5ec7f15d0c prefill bypass graph Wang, Yi A 2025-04-15 00:27:07 -0700
6b21985c95 Merge branch 'main' into warmup_gaudi_backend Wang, Yi A 2025-04-14 18:24:34 -0700
73e797528d

L4 fixes (#3161) Mohit Sharma 2025-04-14 22:13:53 +0530
487d0634ed

setuptools <= 70.0 is vulnerable: CVE-2024-6345 Nicolas Patry 2025-04-14 17:27:39 +0200
fe56f760df

Upgrading the python client deps (still deprecated, but used for integration-tests) Nicolas Patry 2025-04-14 17:18:43 +0200
75e3ec5b84

Upgrading the dependencies in Gaudi backend. Nicolas Patry 2025-04-14 16:51:21 +0200
d62c941c56

Gaudi: clean cuda/rocm code in hpu backend, enable flat_hpu (#3113) Wang, Yi 2025-04-14 21:58:13 +0800
74ad8ed300 ipex cpu could also support in function Wang, Yi A 2025-04-13 20:49:35 -0700
ce8548f5c4 softcap default -1.0 Wang, Yi A 2025-04-13 20:02:05 -0700
ba049c9d49 improve performance Wang, Yi A 2025-04-11 06:10:17 -0700
9f0f41835f Fixed unused import Daniël de Kok 2025-04-11 18:15:21 +0000
c03f8d2bb1 Update Gemma3 test outputs Daniël de Kok 2025-04-11 16:05:26 +0000
6652d6e6e0 Support flashinfer for Gemma3 prefill Daniël de Kok 2025-04-11 15:58:57 +0000
a9b26b221a launcher: ensure correct detection of Gemma 3 head size Daniël de Kok 2025-04-11 11:56:18 +0000
2a10a28d08 force attn to flashdecoding add_chunked_atn Mohit Sharma 2025-04-11 15:24:12 +0000
a7353c35e8 fix bt Mohit Sharma 2025-04-11 15:10:19 +0000
d2f8caff2b support cuda graphs Mohit Sharma 2025-04-11 15:05:28 +0000
fd92054e1d

Fix state.plan call to use positional arguments Curtis Ruck 2025-04-11 10:09:24 -0400
3d71c06aff flashinfer: head_dim -> head_dim_qk flashinfer-0.2.5 Daniël de Kok 2025-04-11 12:37:21 +0000
e893362ad7 Update to flashinfer 0.2.5 Daniël de Kok 2025-04-11 10:24:48 +0000
76cc129796 remove block_scales which is not needed anymore Wang, Yi A 2025-04-11 01:27:49 -0700
a83e9fe003 work with the latest vllm extension ops Wang, Yi A 2025-04-10 19:56:58 -0700
4de8fb0127 Merge branch 'gaudi_backend_pa' into warmup_gaudi_backend Wang, Yi A 2025-04-10 19:42:22 -0700
4cdc34ec4d match the latest vllm_extension ops Wang, Yi A 2025-04-10 19:32:32 -0700
610dd200e5 Merge branch 'main' into gaudi_backend_pa Wang, Yi A 2025-04-10 18:20:28 -0700
cd900c3b72 pingpong optimization Wang, Yi A 2025-04-08 19:56:10 -0700
3f343cdb6f reverse flash causal change Mohit Sharma 2025-04-10 15:03:44 +0000
33a7ec57e2 add fix Mohit Sharma 2025-04-10 14:59:39 +0000
517e4398c2 add fix Mohit Sharma 2025-04-10 13:21:11 +0000
73d0876f12

Fixing the updating logic of backends. kvrouter-endpoints Nicolas Patry 2025-04-10 11:04:03 +0200
18cb4a4221

Fixing add/remove/set backends. Nicolas Patry 2025-04-10 09:14:30 +0200
9a8d0462e1

Fixing tokenization like https://github.com/huggingface/text-embeddin… (#3156) Nicolas Patry 2025-04-09 18:42:25 +0200
d93ad244a3 add attn add_chunked_attn Mohit Sharma 2025-04-09 16:37:34 +0000
e5618d6e40 add chunked attn support chunked_attn_l4 Pedro Cuenca 2025-04-09 16:36:06 +0000
5861da1ad7

Fixing Qwen 2.5 VL (32B). (#3157) Nicolas Patry 2025-04-09 17:07:30 +0200
33af4dcd6c

Fixing Qwen 2.5 VL (32B). Nicolas Patry 2025-04-09 16:10:32 +0200
0eb4bdc909

Fixing tokenization like https://github.com/huggingface/text-embeddings-inference/issues/525 Nicolas Patry 2025-04-09 15:22:49 +0200
a2d2406ddd

Update repo links Inferentia refer HF docs Guspan Tanadi 2025-04-09 14:23:51 +0700
f8c8c3d397 softcap default -1.0 Wang, Yi A 2025-04-08 22:42:03 -0700
8d36856d57 install xelink lib Wang, Yi A 2025-04-08 20:42:28 -0700
50282e3cc1 transformers flash llm/vlm enabling in xpu Wang, Yi A 2025-04-08 18:36:28 -0700
a1f3ebe17c

Release 3.2.3 v3.2.3 git_v3.2.3 Nicolas Patry 2025-04-08 10:17:51 +0200
0b28aabb94

3.2.3 (#3151) Nicolas Patry 2025-04-08 10:16:37 +0200
68df6b19e4

3.2.3 Nicolas Patry 2025-04-08 10:14:55 +0200
5831ff6e69 remove useless rwlock Corentin REGAL 2025-04-08 09:40:45 +0200