text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-08 19:04:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

revert-3323-revert-3313-bump-transformers-455

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3326

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

b094f026c1 chore(neuron): bump version to 0.2.0 David Corvoysier 2025-05-22 14:35:18 +0000
c065c58818 Remove Optimum-habana yuanwu 2025-06-10 05:09:56 +0000
2204f91f32 fix: adjust llava logic and bump snaps support-granite-vision drbh 2025-06-06 14:54:10 +0000
c43954d44c fix multi-modality apply template issue Wang, Yi A 2025-06-04 20:33:53 -0700
d5ba5f54f6 Use the max_position_embeddings yuanwu 2025-06-06 07:22:40 +0000
1505d4687a Remove useless modifications yuanwu 2025-06-06 07:06:19 +0000
4a89f59ec7 Remove useless modification yuanwu 2025-06-06 06:46:06 +0000
eed58b77c3 Remove debug info yuanwu 2025-06-06 06:17:45 +0000
dbb24255c3 fix multi-modality concatenate Wang, Yi A 2025-06-05 23:14:15 -0700
7f346a88e3 Fix the crash issue of Qwen/Qwen3-235B-A22B yuanwu 2025-06-06 06:14:01 +0000
acc02aeb3e set block mapping inside model graph Wang, Yi A 2025-06-03 23:49:29 -0700
30bdf922bd feat: improve llava next pooling for granite vision drbh 2025-06-04 13:50:39 +0000
1a5ef906ae Remove debug info yuanwu 2025-06-03 05:28:38 +0000
8b9a503f8a Move the _update_cos_sin_cache into get_cos_sin yuanwu 2025-06-04 03:00:23 +0000
79ee5135e3 remove unnecessage input_id pad Wang, Yi A 2025-06-02 23:47:23 -0700
1ff9d185d5

Remove useless packages (#3253) Yuan Wu 2025-06-03 19:42:29 +0800
151d6638d3 avoid reshape of all_input_ids_tensor Wang, Yi A 2025-06-02 22:17:31 -0700
d2e6e863a4 Remove useless packages yuanwu 2025-05-30 03:21:16 +0000
8e41da951d Release 3.3.2 v3.3.2 git_3.3.2 Daniël de Kok 2025-05-30 14:19:18 +0000
249189d96e

Prepare for 3.3.2 (#3249) Daniël de Kok 2025-05-30 16:16:36 +0200
7063adf2f5 Prepare for 3.3.2 Daniël de Kok 2025-05-30 11:18:50 +0000
97f305b28f

Merge 1cb904e619 into 6b6e30a6f6 Jim Burtoft 2025-05-29 17:11:32 +0200
6b6e30a6f6

[gaudi] Fix the Llama-4-Maverick-17B-128E crash issue (#3246) Yuan Wu 2025-05-29 17:38:44 +0800
b1b79bf32d Fix the Llama-4-Maverick-17B-128E crash issue yuanwu 2025-05-29 08:37:25 +0000
70217ac345

[Gaudi] Fix the OOM issue of Llama-4-Scout-17B-16E-Instruct (#3245) Yuan Wu 2025-05-29 15:58:24 +0800
fb104d8b42 Fix the OOM issue of Llama-4-Scout-17B-16E-Instruct yuanwu 2025-05-29 06:38:45 +0000
5155fef477

Merge branch 'main' into qwen3_moe Yuan Wu 2025-05-29 13:05:31 +0800
f14044009a

fp8 compressed tensors w8a8 support for Gaudi backend (#3242) Wang, Yi 2025-05-28 20:54:20 +0800
f147f10ed4 remove install of ipex Wang, Yi A 2025-05-27 22:38:02 -0700
475f6e21bc add multi-weight for GPTQ weight loader Wang, Yi A 2025-05-26 23:21:59 -0700
e72d2574c8 use xccl Wang, Yi A 2025-05-26 20:22:03 -0700
ce8978f9ea remove print Wang, Yi A 2025-05-25 18:56:41 -0700
a2934644b8 Merge branch 'main' into fp8_compressor Wang, Yi A 2025-05-25 18:55:35 -0700
fab395b41f perf(trtllm): reduce futile loop iterations Tzu-Yu Lee 2025-05-25 22:07:54 +0800
f7bd82a90e feat(trtllm): get more accurate start time Tzu-Yu Lee 2025-05-25 17:40:45 +0800
41819d70f7 fix(trtllm): fix do_sample being ignored Tzu-Yu Lee 2025-05-18 18:22:02 +0800
4ffa111fb0 fp8 compressed_tensors w8a8 support Wang, Yi A 2025-05-22 21:48:04 -0700
1883a62a94

Add Qwen3 for Gaudi backend (#3229) Yuan Wu 2025-05-23 14:58:35 +0800
45d95bdccc

Merge branch 'huggingface:main' into qwen3_moe Yuan Wu 2025-05-23 10:26:57 +0800
cc3f6127ef Remove debug modification yuanwu 2025-05-22 18:53:49 +0300
5e1d1bf174 Cannot use the latest transformers yuanwu 2025-05-22 18:09:31 +0300
f58d7cf50e

Nix: switch to hf-nix (#3240) Daniël de Kok 2025-05-22 17:09:15 +0200
5aafd37d7b Remove outdated local overrides Daniël de Kok 2025-05-22 14:27:55 +0000
1ccf86ce84 Use the 4.52.2 transformers yuanwu 2025-05-22 17:01:07 +0300
6c1d9f1377 Nix: switch to hf-nix Daniël de Kok 2025-05-22 13:34:39 +0000
f08b44ade5

Upgrade to new vllm extension ops for Gaudi backend (fix issue in exponential bucketing) (#3239) Wang, Yi 2025-05-22 21:29:16 +0800
abaa99ebaa upgrade to new vllm extension ops(fix issue in exponential bucketing) Wang, Yi A 2025-05-22 01:23:04 -0700
767a65202d Release 3.3.1 v3.3.1 git_3.3.1 Daniël de Kok 2025-05-22 07:47:12 +0000
674c514d44

Prepare for 3.3.1 (#3238) Daniël de Kok 2025-05-22 09:43:55 +0200
9e7e546923

Move input_ids to hpu and remove disposal of adapter_meta (#3237) Wang, Yi 2025-05-22 15:21:31 +0800
346b6f7219 Use the latest transformers yuanwu 2025-05-22 05:45:45 +0300
2e8d3e91ea Add mark_step into llama4 yuanwu 2025-05-22 07:20:21 +0300
ad41abd68c Add mark_step into qwen3 yuanwu 2025-05-22 07:17:49 +0300
3d20c79007

Merge branch 'huggingface:main' into qwen3 Yuan Wu 2025-05-22 11:56:29 +0800
3338b34ba4 lora enable in xpu Wang, Yi A 2025-05-21 18:24:04 -0700
c20d0827db Prepare for 3.3.1 Daniël de Kok 2025-05-21 13:55:55 +0000
e32528792c

Switch to punica-sgmv kernel from the Hub (#3236) Daniël de Kok 2025-05-21 15:44:15 +0200
1495616b8b nix: client depends on aiohttp Daniël de Kok 2025-05-21 09:54:05 +0000
40a4f9b5ea Switch to punica-sgmv kernel from the Hub Daniël de Kok 2025-05-21 08:31:00 +0000
b7ab3d3da7 move input_ids to hpu and remove disposal of adapter_meta Wang, Yi A 2025-05-20 23:28:46 -0700
96535e8be8

Merge 70c616ca27 into 43b1b07fb9 drbh 2025-05-21 06:04:21 +0200
43b1b07fb9

Fix the crash in default ATTENTION path for Gaudi backend (#3235) Wang, Yi 2025-05-20 20:02:32 +0800
8209cb90b2 fix the crash in default ATTENTION path Wang, Yi A 2025-05-20 04:41:11 -0700
000e313a92

Refine warmup and upgrade to synapse AI 1.21.0 (#3234) Wang, Yi 2025-05-20 16:22:43 +0800
2a014786e4 Remove debug log yuanwu 2025-05-20 02:31:36 +0000
05b6ed1bff Fix num_key_value_heads issue yuanwu 2025-05-20 02:29:12 +0000
a5e889d037 update to 1.21 Wang, Yi A 2025-05-19 18:01:04 -0700
ae0c9dfb62 enable VLLM_EXPONENTIAL_BUCKETING Wang, Yi A 2025-05-18 19:56:11 -0700
550c85c39e refine warm up Wang, Yi A 2025-05-17 02:37:43 -0700
d658b5def3

Deepseek R1 for Gaudi backend (#3211) Wang, Yi 2025-05-19 22:36:39 +0800
b32b78e74e Fix crash issue yuanwu 2025-05-19 01:39:48 +0000
8275bdcfe9 Fix? gaudi_llama4_tmp regisss 2025-05-18 22:04:41 +0000
c18766afec allocate from 1 block in router Wang, Yi A 2025-05-18 06:42:11 -0700
56dd0a09e6 feat(trtllm): check existence of config files Tzu-Yu Lee 2025-05-18 03:25:13 +0800
987337bf31 feat(trtllm): catch broader exception Tzu-Yu Lee 2025-05-18 02:49:35 +0800
27d03309c9 feat(trtllm): add stop sequence support Tzu-Yu Lee 2025-05-18 02:37:19 +0800
0858af206f fix(trtllm): fix segfault when canceling request Tzu-Yu Lee 2025-05-18 02:22:53 +0800
cc4b5848b9 fix: fix prometheus_port CLI short arg conflict Tzu-Yu Lee 2025-05-13 00:05:56 +0800
c458d21d07 feat(trtllm): add new finish reasons Tzu-Yu Lee 2025-05-11 03:27:59 +0800
58934c8b61

fix: count gpu uuids if NVIDIA_VISIBLE_DEVICES env set to all (#3230) drbh 2025-05-16 11:48:58 -0400
80b43a9974 fix: count gpu uuids if NVIDIA_VISIBLE_DEVICES env set to all drbh 2025-05-16 15:01:39 +0000
becf36f5e4 fmt Wang, Yi A 2025-05-16 06:07:56 -0700
b5e1ae9209 minor fix Wang, Yi A 2025-05-15 23:34:48 -0700
a184ce3876 mixtral moe fix after upgrade vllm extension ops git Wang, Yi A 2025-05-15 19:22:57 -0700
8c182415c2 Enable the qwen3 MOE yuanwu 2025-05-16 01:40:22 +0000
638714f964 Add Qwen3 yuanwu 2025-05-13 07:42:22 +0000
d704b0c852 Add Qwen3 yuanwu 2025-05-13 07:42:22 +0000
f93ed958e4 Update Transformers requirement regisss 2025-05-15 20:04:26 +0000
18cbecfb38

Enable Llama4 for Gaudi backend (#3223) Yuan Wu 2025-05-15 20:35:37 +0800
7e531f413d

Update to Torch 2.7.0 (#3221) Daniël de Kok 2025-05-15 11:48:33 +0200
9281be20c0 accelerate warmup Wang, Yi A 2025-05-14 19:16:00 -0700
d859dd36b7 Fixup mllama Daniël de Kok 2025-05-14 13:54:06 +0000
c9b6478b14 Mamba too Daniël de Kok 2025-05-14 13:17:47 +0000
74ded00ecb Attempt again to sync with CI Daniël de Kok 2025-05-14 09:58:34 +0000
b2bd163d19

Mla deepspeek (#2) Wang, Yi 2025-05-13 22:42:46 +0800
6d4e98dae3 Fix some test outputs with slight deviations Daniël de Kok 2025-05-13 09:04:50 +0000
e14a451d8d Add the latest transformers yuanwu 2025-05-13 01:38:18 +0000
4128953df3

Merge 0cd6ff7a3d into 535ce23827 omahs 2025-05-12 09:40:49 -0300
c264a42aa1 adjust the round_up_seq logit to align with prefill warmup phase on HPU Liu, Kaixuan 2025-05-12 07:21:33 -0400
f7b7d435bf Pin click to fix incompatibility with typer Daniël de Kok 2025-05-12 11:00:21 +0000