text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-06-23 17:40:17 +00:00

Commit Graph

Select branches

Hide Pull Requests

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi/add-ci

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

improve-docs

improve-dynamic-message-content

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3258

#3260

#3261

#3262

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#328

#329

#33

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

737b1d8369

Merge ae7f3aeba1 into 719907410b Baptiste Colle 2025-06-23 12:27:39 +0000
ae7f3aeba1 update conftest gaudi/add-ci baptiste 2025-06-23 12:27:32 +0000
a32025f931 fix style baptiste 2025-06-23 12:26:06 +0000
0295bf243f fix broken test baptiste 2025-06-23 12:10:14 +0000
cd5c51974e

Merge f384e0f308 into 719907410b Wang, Yi 2025-06-23 19:46:52 +0800
1f03afe94d enable multi-card test baptiste 2025-05-21 15:28:58 +0000
8768085c8c add new gaudi3 runners baptiste 2025-05-21 11:27:11 +0000
9c235f4d66 feat(gaudi/ci): added ci for gaudi device baptiste 2025-04-22 09:17:44 +0000
fcf6870d20 testing baptiste 2025-04-22 08:43:45 +0000
59dc8c2699 change defualt behaviour to only run a subset of all the models baptiste 2025-04-22 08:16:17 +0000
9c6776375e change defualt behaviour to only run a subset of all the models baptiste 2025-04-22 08:15:11 +0000
a2a5772cd7 wip(ci): debug the ci Baptiste Colle 2025-04-10 17:17:16 +0200
4b5e812fe1 wip(ci): debug the ci Baptiste Colle 2025-04-10 16:08:06 +0200
2c2cfc09c5 Update tests.yaml Pauline Bailly-Masson 2025-04-10 15:23:17 +0200
1bd2ad9635 Update tests.yaml Pauline Bailly-Masson 2025-04-10 15:16:14 +0200
76d155e660 wip(ci): rerun ci to debug baptiste 2025-04-10 11:47:40 +0000
8568f910a7 fix llama failing test baptiste 2025-04-10 09:03:49 +0000
781dd203e9 feat(ci): llama3 test working baptiste 2025-04-10 08:32:37 +0000
7779d0c786 feat(ci): llama3 test working baptiste 2025-04-10 08:32:28 +0000
b4917f67e4 wip: able to launch gaudi tests baptiste 2025-04-10 07:52:20 +0000
4e40467c6d wip(test): adding test to ci baptiste 2025-04-10 07:46:59 +0000
3a7619d450

Merge fd88b1d6b9 into 719907410b Jiayu Liu 2025-06-23 15:37:28 +0530
e7500dbbd4

Merge d17f36e497 into 719907410b Emmanuel Ferdman 2025-06-23 15:37:28 +0530
719907410b

[gaudi] Refine rope memory, do not need to keep sin/cos cache per layer (#3274) main Wang, Yi 2025-06-23 17:15:39 +0800
95c7cf9b5c refine rope memory, do not need to keep sin/cos cache per layer Wang, Yi A 2025-06-20 19:00:40 -0700
c86cbf42b7

Merge 2204f91f32 into 238fbd4d50 drbh 2025-06-20 17:58:39 +0200
664e0401f9

Merge c090cd1506 into 238fbd4d50 Frans de Jonge 2025-06-20 16:24:37 +0200
2245fe2fc2

Merge f147f10ed4 into 238fbd4d50 Wang, Yi 2025-06-20 07:14:05 +0800
d811f363d8

Merge 3338b34ba4 into 238fbd4d50 Wang, Yi 2025-06-20 07:13:36 +0800
ad7ef16508

Merge aeabb7b71a into 238fbd4d50 Gerard Casas Saez 2025-06-19 12:21:22 -0400
aaa9353092

Merge c43954d44c into 238fbd4d50 Wang, Yi 2025-06-19 22:42:07 +0800
472bc27371

Merge fab395b41f into 238fbd4d50 Tzu-Yu Lee 2025-06-19 19:39:47 +0800
d4bd5cac79 chore: version 3.3.4 v3.3.4 git_v3.3.4 David Corvoysier 2025-06-19 09:08:38 +0000
238fbd4d50

Neuron backend fix and patch version 3.3.4 (#3273) David Corvoysier 2025-06-19 10:52:41 +0200
14ee6e7804

[gaudi] gemma3 text and vlm model intial support. need to add sliding window support later (#3270) Wang, Yi 2025-06-19 15:32:34 +0800
8aca6b32d8

Merge a2d2406ddd into bd1bdebb47 Guspan Tanadi 2025-06-19 05:37:11 +0200
6e1ca4f619 chore: prepare 3.3.4 David Corvoysier 2025-06-18 19:04:10 +0000
82ebfd67bb fix(neuron): wrong assertion when batch_size==1 David Corvoysier 2025-06-18 18:54:53 +0000
1754b79f10 chore: release 3.2.3 v3.3.3 git_v3.3.3 David Corvoysier 2025-06-18 12:59:29 +0000
bd1bdebb47

doc: fix README (#3271) David Corvoysier 2025-06-18 12:35:36 +0200
f13e28c98d

[gaudi] Refine logging for Gaudi warmup (#3222) regisss 2025-06-18 04:34:00 -0600
d343820b61 doc: fix README David Corvoysier 2025-06-18 09:56:58 +0000
b4d17f18ff

chore: prepare release 3.3.3 (#3269) David Corvoysier 2025-06-18 11:55:26 +0200
93e62e73c8 gemma3 text and vlm model intial support. need to add sliding window support later Wang, Yi A 2025-06-17 17:46:25 -0700
1acc96c82a Black regisss 2025-06-18 07:52:59 +0000
ab81d70000 chore: prepare release 3.3.3 David Corvoysier 2025-06-18 07:38:44 +0000
9dbaa176fd Add log_master & VLM cases regisss 2025-06-17 21:13:13 +0000
0627983c17

[Gaudi] use pad_token_id to pad input id (#3268) Wang, Yi 2025-06-17 15:07:25 +0800
388f27aaa8 remove unused Deepseek and AutoGPTQ pip Wang, Yi A 2025-06-16 22:31:51 -0700
9431a1ec4c [Gaudi] use pad_token_id to pad input id Wang, Yi A 2025-06-16 05:18:01 -0700
564c9e1cc0 Flash causal LM case regisss 2025-06-16 21:07:44 +0000
2ba396c4c1 Merge branch 'main' into add_logs_gaudi_warmup regisss 2025-06-16 12:36:45 +0000
f384e0f308 HuggingFaceM4/Idefics3-8B-Llama3 crash fix Wang, Yi A 2025-06-13 00:35:39 -0700
0381aba864

Merge 551ee3a365 into 3752143b39 drbh 2025-06-15 00:49:23 +0200
aeabb7b71a

disable mamba in CPU Gerard Casas Saez 2025-06-13 12:34:28 -0500
3752143b39

[Gaudi] Fix the integration-test issues (#3265) Yuan Wu 2025-06-13 20:47:06 +0800
7ad4909ce8

Merge branch 'main' into ci Yuan Wu 2025-06-13 20:42:53 +0800
ded4cb52ac

[Gaudi] Enable Qwen3_moe model (#3244) Yuan Wu 2025-06-13 18:03:24 +0800
a220e57f45

[gaudi] HuggingFaceM4/idefics2-8b issue fix (#3264) Wang, Yi 2025-06-13 18:00:08 +0800
8ab1a14e23 Fix test case yuanwu 2025-06-13 07:20:34 +0000
63a75a7952 Remove opt model yuanwu 2025-06-13 05:56:40 +0000
a50b33a964 Fix mistral error yuanwu 2025-06-13 05:43:16 +0000
9ed9497f7e Add UV yuanwu 2025-06-13 05:25:10 +0000
dbec20f98f delete optimum.habana import Wang, Yi A 2025-06-12 22:42:10 -0700
1e56e5fe5c [gaudi] HuggingFaceM4/idefics2-8b issue fix Wang, Yi A 2025-06-12 22:10:13 -0700
1791c855f0

Merge branch 'huggingface:main' into qwen3_moe Yuan Wu 2025-06-13 10:02:18 +0800
0bbd8d1645 Remove useless code yuanwu 2025-06-13 01:48:29 +0000
e07056ab3f

[Gaudi] Remove optimum-habana (#3261) Yuan Wu 2025-06-13 04:35:36 +0800
25fdc5f03c

[gaudi] Move the _update_cos_sin_cache into get_cos_sin (#3254) Yuan Wu 2025-06-13 04:31:11 +0800
613b8dd647

[gaudi] Vlm rebase and issue fix in benchmark test (#3263) Wang, Yi 2025-06-13 04:26:37 +0800
027f293098 fix mllama oom if set batch_size > 8 Wang, Yi A 2025-06-11 23:18:59 -0700
bba260912c fix mllama crash if bs>0 and filter Wang, Yi A 2025-06-11 20:07:48 -0700
b1ae4ad260 fix Qwen2 vl crash in benchmark Wang, Yi A 2025-06-10 23:30:11 -0700
f72b290020 add mark_step in vlm part Wang, Yi A 2025-06-10 19:02:14 -0700
d68edc4a2f Qwen2 vl fix Wang, Yi A 2025-06-09 22:25:47 -0700
93e5e35f9d llama4 and some issue fix Wang, Yi A 2025-06-08 23:56:38 -0700
b09d4cc142 port https://github.com/huggingface/text-generation-inference/pull/3188 to gaudi backend Wang, Yi A 2025-06-08 20:03:28 -0700
d17f36e497

Migrate to V2 Pydantic interface Emmanuel Ferdman 2025-06-11 15:34:12 -0700
839477670a

[gaudi] Perf optimization (#3256) Wang, Yi 2025-06-11 21:00:21 +0800
5f26a72876 Remove the workaround for HPU distributed. yuanwu 2025-06-11 06:15:11 +0000
e202b5f98f Remove mllama.py and llava_next.py yuanwu 2025-06-11 05:37:03 +0000
c112ef1796 Remove useless files yuanwu 2025-06-11 03:27:19 +0000
91c40e6c58 Fix crash yuanwu 2025-06-11 02:34:09 +0000
512eca7f8f Remove debug info yuanwu 2025-06-06 08:26:38 +0000
14112d800b

Merge 2394437dc7 into 79183d1647 Sebastian Liebscher 2025-06-11 09:52:35 +0800
79183d1647

Bump neuron SDK version (#3260) David Corvoysier 2025-06-10 17:56:25 +0200
d5bad17ed6 fix(neuron): adjust test expectations for llama on nxd David Corvoysier 2025-05-26 13:55:20 +0000
2c8b0e37c4 tests(neuron): remove obsolete models David Corvoysier 2025-05-26 13:54:41 +0000
5d2b159182 fix(neuron): adapt entrypoint David Corvoysier 2025-05-26 10:13:33 +0000
07a0e2f7e6 Set default value of ATTENTION as paged yuanwu 2025-06-10 07:42:11 +0000
3e977bde99 feat(neuron): support on-device sampling David Corvoysier 2025-05-23 13:26:05 +0000
bf529ef476 test(neuron): update models and expectations David Corvoysier 2025-05-23 10:13:29 +0000
4e8ffec8ef fix(generator): emulate greedy in sampling parameters David Corvoysier 2025-05-27 09:34:19 +0000
b916076c72 fix(nxd): adapt model retrieval to new APIs David Corvoysier 2025-05-27 12:27:22 +0000
39895019c8 fix(neuron): neuron config is not stored in config anymore David Corvoysier 2025-05-23 09:48:05 +0000
c4dd2a8197 fix(neuron): use new cache import path David Corvoysier 2025-05-23 08:37:17 +0000
83eadbb256 fix(neuron): use neuron_config whenever possible David Corvoysier 2025-05-23 08:33:12 +0000
0b640f7c8c refactor(neuron): remove obsolete code paths David Corvoysier 2025-05-23 08:27:27 +0000
2eb223613e refactor(neuron): use named parameters in inputs helpers David Corvoysier 2025-05-22 14:53:25 +0000
b094f026c1 chore(neuron): bump version to 0.2.0 David Corvoysier 2025-05-22 14:35:18 +0000