text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

d13215da8f fix(server): fix deepseekv2 loading (#2266) OlivierDehaene 2024-07-21 16:48:04 +0000
85f10ec5c9 feat(fp8): use fbgemm kernels and load fp8 weights directly (#2248) OlivierDehaene 2024-07-20 17:02:04 +0000
50149c3800 Add FP8 release test (#2261) Daniël de Kok 2024-07-20 12:26:06 +0200
c1638a56f1 Add support for Deepseek V2 (#2224) Daniël de Kok 2024-07-19 17:23:20 +0200
898a892082 fix: adjust default tool choice (#2244) drbh 2024-07-19 11:12:02 -0400
8afc17396d add usage stats to toctree (#2260) Erik Kaunismäki 2024-07-19 16:34:04 +0200
66f3de583e usage stats and crash reports (#2220) Erik Kaunismäki 2024-07-19 16:17:56 +0200
e658d95c23 Hotfix: pass through model revision in VlmCausalLM (#2258) Daniël de Kok 2024-07-19 15:59:00 +0200
990ea793c0 Hotfix: fix MPT after recent refactor (#2257) Daniël de Kok 2024-07-19 14:42:35 +0200
ba0dfb6fb1 Hotfix: various GPT-based model fixes (#2256) Daniël de Kok 2024-07-19 14:42:19 +0200
394f8c7d2b Hotfix: fix of use of unquantized weights in Gemma GQA loading (#2255) Daniël de Kok 2024-07-19 12:55:59 +0200
2dd680b799 Improve the handling of quantized weights (#2250) Daniël de Kok 2024-07-19 09:37:39 +0200
118ee57f82 fix(server): fix cohere (#2249) OlivierDehaene 2024-07-18 14:00:13 +0000
e0710ccbeb Remove stray quantize argument in get_weights_col_packed_qkv (#2237) Daniël de Kok 2024-07-16 09:30:57 +0200
7177da0df6 server quantize: expose groupsize option (#2225) Daniël de Kok 2024-07-16 08:36:05 +0200
e955f7b536 Add support for AWQ-quantized Idefics2 (#2233) Daniël de Kok 2024-07-16 07:58:25 +0200
8a223eb6ac fix: Remove bitsandbytes installation when running cpu-only install (#2216) Hugo Larcher 2024-07-15 15:34:20 +0200
271ebb7e20 fix custom cache dir (#2226) Erik Kaunismäki 2024-07-15 15:17:13 +0200
619eeded47 feat: simple mistral lora integration tests (#2180) drbh 2024-07-15 09:16:15 -0400
ee56266044 Use symmetric quantization in the quantize subcommand (#2120) Daniël de Kok 2024-07-12 12:20:12 +0200
dedeb3cfa0 Modifying base in yarn embedding (#2212) SeongBeomLEE 2024-07-12 17:04:51 +0900
5029e7215c fix: append DONE message to chat stream (#2221) drbh 2024-07-11 10:42:58 -0400
85c3c5d64f Add support for FP8 on compute capability >=8.0, <8.9 (#2213) Daniël de Kok 2024-07-11 16:03:26 +0200
2a6c3caf1d Move quantized weight handling out of the Weights class (#2194) Daniël de Kok 2024-07-09 20:04:03 +0200
cc4fceb21d Updating the self check (#2209) Nicolas Patry 2024-07-09 17:23:48 +0200
591f9f70eb Adding sanity check to openapi docs. Nicolas Patry 2024-07-09 11:13:48 +0200
eaaea91e2b Fix nccl regression on PyTorch 2.3 upgrade (#2099) fxmarty 2024-07-08 17:52:10 +0200
48f1196da8 feat: use model name as adapter id in chat endpoints (#2128) drbh 2024-07-08 10:06:49 -0400
74edda9c23 update to metrics 0.23.0 or could work with metrics-exporter-promethe… (#2190) Wang, Yi 2024-07-08 22:03:59 +0800
4a54e41920 fix: python deserialization (#2178) Javier Martinez 2024-07-08 15:59:16 +0200
8dd9b2b135 add doc for intel gpus (#2181) Wang, Yi 2024-07-08 21:57:06 +0800
540e710c3f Falcon/DBRX: get correct number of key-value heads (#2205) Daniël de Kok 2024-07-08 13:22:38 +0200
17594916ed Fix incorrect cache allocation with multi-query (#2203) Daniël de Kok 2024-07-08 11:19:48 +0200
f11fd699b6 hotfix: Fix number of KV heads (#2202) Daniël de Kok 2024-07-08 09:52:12 +0200
8e3d1e6c3f fix dbrx & opt model prefix bug (#2201) icyboy™ 2024-07-08 15:01:14 +0800
508e308088 Consistently take prefix in model constructors (#2191) Daniël de Kok 2024-07-05 16:07:48 +0200
54c194dfa6 GPTQ CI improvements (#2151) Daniël de Kok 2024-07-05 14:12:16 +0200
1e7ce69f20 Fix Starcoder2 after refactor (#2189) Daniël de Kok 2024-07-05 12:22:45 +0200
e481a9bb9b Hotfixing after refactor. Nicolas Patry 2024-07-05 09:25:29 +0000
1b434e8019 Refactor dead code - Removing all flash_xxx.py files. (#2166) Nicolas Patry 2024-07-05 10:29:56 +0200
7efcb5e0ed

remove LORA_ADAPTERS_PATH (#2563) Nicholas Broad 2024-09-24 16:20:15 -0700
11782d367d

remove LORA_ADAPTERS_PATH Nicholas Broad 2024-09-24 15:29:54 -0700
dd8691b7c5

More tensor cores. (#2558) Nicolas Patry 2024-09-24 23:57:26 +0200
c032280b17

Cleanup Vertex + Chat (#2553) Nicolas Patry 2024-09-24 23:37:17 +0200
bb8c38f5fe

Gemma is modified by this. Nicolas Patry 2024-09-24 22:51:45 +0200
d77a31cd95

Fixing the logic. Nicolas Patry 2024-09-24 14:42:01 +0200
56c630a425

More tensor cores. Nicolas Patry 2024-09-24 13:51:36 +0200
75c8c54ac9

Hotfixing main. (#2562) Nicolas Patry 2024-09-24 23:00:43 +0200
e2c92a0a07

Hotfixing main. Nicolas Patry 2024-09-24 22:59:28 +0200
c6231ac4c7 feat: enable pytorch xpu support for non-attention models Dmitry Rogozhkin 2024-09-19 16:47:55 -0700
ebe33e7dbc

Fixing the pre-commit after rebase. Nicolas Patry 2024-09-24 22:24:44 +0200
02b25e524d

Update Cargo lock. Nicolas Patry 2024-09-24 22:14:36 +0200
e4397991d2

Revert everything. Nicolas Patry 2024-09-24 20:23:02 +0200
6a07d1e83c

Trying smething. Nicolas Patry 2024-09-24 20:18:28 +0200
48b7841a68

Trying some other install. Nicolas Patry 2024-09-24 20:14:10 +0200
0b029b3c24

Dummy change. Nicolas Patry 2024-09-24 20:12:12 +0200
6c6f2b5575

Wat? Nicolas Patry 2024-09-24 20:10:29 +0200
7d219fc2bd

Updating Cargo ? Nicolas Patry 2024-09-24 20:05:29 +0200
4a29ae2b66

Not unstable. Nicolas Patry 2024-09-24 20:01:55 +0200
846fcc3447

Let's debug that. Nicolas Patry 2024-09-24 17:41:34 +0200
259ba29a90

Fixup doc. Nicolas Patry 2024-09-24 17:35:42 +0200
d46d3c65ea

Changing back this logprobs default. Nicolas Patry 2024-09-24 17:21:07 +0200
6744df5873

Fix docs. Nicolas Patry 2024-09-24 11:38:59 +0200
be00fb7fc0

Parameters are optional Nicolas Patry 2024-09-24 11:37:58 +0200
507ecae147

logprobs defaults to false. Nicolas Patry 2024-09-23 22:21:43 +0200
7cc18e85b5

Cleanup Vertex + Chat Nicolas Patry 2024-09-23 22:00:59 +0200
e6d29656b5

Adding note for private models in quick-tour document (#2548) Aritra Roy Gosthipaty 2024-09-24 18:36:53 +0530
8024ded58f

Simplify crossterm imports (#2545) Orhun Parmaksız 2024-09-24 15:57:20 +0300
03263f5e88

Update the link to the Ratatui organization (#2546) Orhun Parmaksız 2024-09-24 15:51:48 +0300
3f14cd1420

Add DenseMoELayer and wire it up in Mixtral/Deepseek V2 (#2537) Daniël de Kok 2024-09-24 14:27:06 +0200
c29dc89c18

Add support for scalar FP8 weight scales (#2550) Daniël de Kok 2024-09-24 13:57:40 +0200
afe3fed1a4 Merge branch 'fix_rocm_fa' into rocm_6.2_fixes tuna rocm_6.2_fixes Mohit Sharma 2024-09-24 10:53:50 +0000
64e981fdcf fix issue for sliding window models Mohit Sharma 2024-09-24 10:53:19 +0000
0ff6ff60ad

Hotfixing main (#2556) Nicolas Patry 2024-09-24 11:51:14 +0200
94c1b56c44

Hotfixing main Nicolas Patry 2024-09-24 11:26:19 +0200
74d3ce106e

Micro cleanup. (#2555) Nicolas Patry 2024-09-24 11:19:24 +0200
67f5051e3d Remove stray debug print Daniël de Kok 2024-09-24 08:31:02 +0000
7c05b0ba54 Support LLM compressor FP8 checkpoints on H100 Daniël de Kok 2024-09-24 08:27:52 +0000
d31a6f75cc

Remove duplicated RUN in Dockerfile (#2547) Alvaro Bartolome 2024-09-24 10:19:13 +0200
c68f72e790

Micro cleanup. Nicolas Patry 2024-09-24 09:54:31 +0200
ccaf9ff507 Add support for scalar FP8 weight scales Daniël de Kok 2024-09-23 15:46:41 +0000
10e6f29295

chore: Add old V2 backend (#2551) OlivierDehaene 2024-09-24 08:38:17 +0200
835ad0a923 Adding "longrope" for Phi-3 (#2172) (#2179) Aaron Mihalik 2024-07-05 03:46:41 -0400
2e09ebecf6 Preparing patch release. (#2186) Nicolas Patry 2024-07-04 10:55:33 +0200
74ddd1265a Version 2.1.1 Nicolas Patry 2024-07-04 12:39:07 +0200
e93c830e66 Fixing missing object field for regular completions. (#2175) Nicolas Patry 2024-07-03 12:56:27 +0200
64989f9439 Fixing the dockerfile warnings. (#2173) Nicolas Patry 2024-07-03 12:48:45 +0200
878491cd5b Revert "Fixing missing object field for regular completions." Nicolas Patry 2024-07-03 10:41:39 +0000
b6c8984658 Fixing missing object field for regular completions. Nicolas Patry 2024-07-03 10:40:22 +0000
233e46409a feat: improve update_docs for openapi schema (#2169) drbh 2024-07-03 03:53:35 -0400
d580215a24 Hotfixing qwen2 and starcoder2 (which also get clamping). (#2167) Nicolas Patry 2024-07-02 14:26:47 +0200
bc5a792dc8 Fixing rocm. (#2164) Nicolas Patry 2024-07-02 12:01:08 +0200
e913f3ad2d fix: use the base layers weight in mistral rocm (#2155) drbh 2024-07-02 05:56:25 -0400
71b0189cd5 fix FlashDecoding change's regression in intel platform (#2161) Wang, Yi 2024-07-02 17:56:07 +0800
9b3d3a3690 Fixing graph capture for flash decoding. (#2163) Nicolas Patry 2024-07-02 11:43:07 +0200
b80bd724e1 Move to FlashDecoding instead of PagedAttention kernel. (#1940) Nicolas Patry 2024-07-01 23:28:00 +0200
2b9339c65b Fixing baichuan override. (#2158) Nicolas Patry 2024-07-01 23:25:54 +0200
381c5c02a6 fix: prefer serde structs over custom functions (#2127) drbh 2024-07-01 09:08:05 -0400
6265956bc4 refine get xpu free memory/enable Qwen2/gemma2/gemma/phi in intel platform (#2132) Wang, Yi 2024-07-01 20:32:54 +0800
5b977c3141 fix AttributeError: 'MixtralLayer' object has no attribute 'mlp' (#2123) icyboy™ 2024-07-01 20:17:22 +0800