text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

e0d168ba20 Use GPTQ-Marlin for supported GPTQ configurations (#2111) Daniël de Kok 2024-07-01 12:59:12 +0200
de96056c26 feat: download lora adapter weights from launcher (#2140) drbh 2024-07-01 06:58:49 -0400
3e02d4fdbf fix: use weights from base_layer (#2141) drbh 2024-07-01 06:58:40 -0400
03691f6d34 Fixing clippy. (#2149) Nicolas Patry 2024-07-01 12:02:19 +0200
8721b601e3 fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_… (#2148) Wang, Yi 2024-07-01 17:27:53 +0800
69514868ee fix: refactor post_processor logic and add test (#2137) drbh 2024-06-27 17:16:19 -0400
bc15e960ea Fixing gemma2. (#2135) Nicolas Patry 2024-06-27 16:04:20 +0200
befe60b566 Fixing malformed rust tokenizers (#2134) Nicolas Patry 2024-06-27 16:04:03 +0200
d731866245 Idefics2: sync added image tokens with transformers (#2080) Daniël de Kok 2024-06-27 15:54:35 +0200
11fced79bd Bumping to 2.1 (#2131) Nicolas Patry 2024-06-27 12:34:43 +0200
7045598b20 Fixing prom leak by upgrading. (#2129) Nicolas Patry 2024-06-27 08:08:43 +0200
399919d715 fix: simplify kserve endpoint and fix imports (#2119) drbh 2024-06-25 19:30:10 -0400
4700ea413f Add support for Marlin 2:4 sparsity (#2102) Daniël de Kok 2024-06-25 21:09:42 +0200
18a8364d94 Support AWQ quantization with bias (#2117) Daniël de Kok 2024-06-25 21:09:00 +0200
8a155b2d5b Enable multiple LoRa adapters (#2010) drbh 2024-06-25 14:46:27 -0400
8980bf43d7 Fix CI . (#2118) Nicolas Patry 2024-06-25 17:53:36 +0200
136fb7e9b9 Add pytest release marker (#2114) Daniël de Kok 2024-06-25 16:53:20 +0200
27ae4f7916 fix cpu and xpu issue (#2116) Wang, Yi 2024-06-25 22:47:06 +0800
d626685039 Removing IPEX_AVAIL. (#2115) Nicolas Patry 2024-06-25 13:20:57 +0200
1f70bb75e3 feat: add simple tests for weights (#2092) drbh 2024-06-25 06:22:59 -0400
0d879fe66e Cpu tgi (#1936) Wang, Yi 2024-06-25 18:21:29 +0800
a9faabc374 fix ChatCompletion and ChatCompletionChunk object string not compatible with standard openai api (#2089) sunxichen 2024-06-25 16:59:50 +0800
e49aed4713 use xpu-smi to dump used memory (#2047) Wang, Yi 2024-06-25 16:15:46 +0800
1952a0b03b corrected Pydantic warning. (#2095) Jeff 2024-06-25 04:10:32 -0400
76c6a5ca2a Add OTLP Service Name Environment Variable (#2076) KevinDuffy94 2024-06-25 08:33:01 +0100
931ff16c7a Support HF_TOKEN environment variable (#2066) Lucain 2024-06-25 09:23:12 +0200
4b25048b75 Fix cargo-chef prepare (#2101) ur4t 2024-06-25 00:16:36 +0800
a05f3849e4 do not set sliding_window if SUPPORTS_WINDOWING is false Wang, Yi A 2024-09-23 20:48:43 -0700
b6a59e2f91 New runner. Manual squash. (#2110) Nicolas Patry 2024-06-24 18:08:34 +0200
d930724e82 feat: sort cuda graphs in descending order (#2104) drbh 2024-06-21 14:28:26 -0400
f0ed8d294f Fix text-generation-server quantize (#2103) Daniël de Kok 2024-06-21 15:28:51 +0200
c61ef1ce85 Factor out sharding of packed tensors (#2059) Daniël de Kok 2024-06-20 09:56:04 +0200
38741feff0 Support exl2-quantized Qwen2 models (#2085) Daniël de Kok 2024-06-20 07:56:16 +0200
6b2cbd0169 Set maximum grpc message receive size to 2GiB (#2075) Daniël de Kok 2024-06-17 16:40:44 +0200
b3dadbde06 fix build.rs watch files (#2072) Ziru Niu 2024-06-17 18:10:01 +0800
58c743bc90 Contributing guide & Code of Conduct (#2074) Lysandre Debut 2024-06-17 12:09:31 +0200
fb939370a3 Support different image sizes in prefill in VLMs (#2065) Daniël de Kok 2024-06-17 10:49:41 +0200
8ee52e91f4 Adding architecture document (#2044) Alvaro Moran 2024-06-14 15:28:34 +0200
b07a2518d9 Update the link for qwen2 (#2068) Tiezhen WANG 2024-06-14 17:59:33 +0800
f1f28404e7 Add support for GPTQ Marlin (#2052) Daniël de Kok 2024-06-14 09:45:42 +0200
7ce29b1ef2 implement Open Inference Protocol endpoints (#1942) drbh 2024-06-13 12:51:51 -0400
d0a1d50fd3 PR #2049 CI run (#2054) drbh 2024-06-13 11:53:49 -0400
2fdad64ece fix(layers): fix SuRotaryEmbedding (#2060) OlivierDehaene 2024-06-12 18:24:47 +0200
e85e7ac4f9 fix(server): fix OPT implementation (#2061) OlivierDehaene 2024-06-12 18:22:20 +0200
99c947452d Support chat response format (#2046) drbh 2024-06-11 10:44:56 -0400
eb8b76d1d2 Update LLMM1 bound (#2050) fxmarty 2024-06-11 13:30:29 +0200
5381fa7393 fix(ci): remove unnecessary permissions (#2045) Luc Georges 2024-06-10 18:16:53 +0200
ac73317894 feat(ci): add trufflehog secrets detection (#2038) Luc Georges 2024-06-10 17:54:13 +0200
748764efb4 Add Phi-3 medium support (#2039) Daniël de Kok 2024-06-10 09:22:29 +0200
5e035063cf ROCm and sliding windows fixes (#2033) fxmarty 2024-06-10 09:09:50 +0200
93663b4567 server: use chunked inputs Daniël de Kok 2024-05-31 11:51:42 +0000
63cd798a19 Xpu gqa (#2013) Wang, Yi 2024-06-07 01:12:57 +0800
0494677284 Internal runner ? (#2023) Nicolas Patry 2024-06-06 18:51:42 +0200
7aaec2a542 marlin: improve build Daniël de Kok 2024-06-06 11:25:56 +0000
e6d8d2e50f marlin: support tp>1 when group_size==-1 Daniël de Kok 2024-06-06 11:51:52 +0000
77ac0f364b Add support for Marlin-quantized models Daniël de Kok 2024-06-05 08:14:40 +0000
9c75591c11 Revert "Less cache misses on cargo build." Nicolas Patry 2024-06-06 10:33:55 +0200
346f77f8ba Less cache misses on cargo build. Nicolas Patry 2024-06-06 10:33:01 +0200
2c1ff79d38 Update __version__ on __init__.py to 0.7.0 (#2017) Andrés Marafioti 2024-06-05 14:51:07 +0200
af9d60c985 Fix GPTQWeight import (#2020) Daniël de Kok 2024-06-05 14:49:15 +0200
8ee07f0eae Fixing rocm. (#2021) Nicolas Patry 2024-06-05 14:41:34 +0200
20df9234a9 feat: move allocation logic to rust (#1835) OlivierDehaene 2024-06-05 12:18:38 +0200
cdd120ac02 Do not initialize scratch space when there are no ExLlamaV2 layers (#2015) Daniël de Kok 2024-06-05 10:45:47 +0200
353a9669ba Hotfixing make install. (#2008) Nicolas Patry 2024-06-04 23:34:03 +0200
ed8913535b Making make install work better by default. (#2004) Nicolas Patry 2024-06-04 19:38:46 +0200
648dd7b8e1 Support GPTQ models with column-packed up/gate tensor (#2006) Daniël de Kok 2024-06-04 19:37:49 +0200
184c89fd55 feat: add SchedulerV3 (#1996) OlivierDehaene 2024-06-04 15:56:56 +0200
63de9ff020 fix: update triton implementation reference (#2002) Emmanuel Ferdman 2024-06-04 15:26:35 +0300
75aed8aed5 Fix Phi-2 with tp>1 (#2003) Daniël de Kok 2024-06-04 14:26:07 +0200
d51f2c465f router: send the input as chunks to the backend Daniël de Kok 2024-06-03 07:27:22 +0000
347ecdae3b reable xpu, broken by gptq and setuptool upgrade (#1988) Wang, Yi 2024-06-03 22:07:50 +0800
b3b175568f Hotfix GPTQ. Nicolas Patry 2024-06-03 09:32:12 +0000
b30b2a6dae Fixing GPTQ imports. (#1994) Nicolas Patry 2024-06-03 10:36:29 +0200
7752f1050b Fixing Phi3. Nicolas Patry 2024-06-01 08:47:00 +0000
c46a223a6d single char ` addition for docs (#1989) Nicholas Broad 2024-05-31 09:42:14 -0700
d1473fab70 Fixing exl2 scratch buffer. (#1990) Nicolas Patry 2024-05-31 18:01:43 +0200
bdc676f65c Purely refactors paged/attention into layers/attention and make hardware differences more obvious with 1 file per hardware. (#1986) Nicolas Patry 2024-05-31 17:57:01 +0200
dd2d46d9d1 Update documentation version to 2.0.4 (#1980) fxmarty 2024-05-31 07:03:24 -0700
f6c5e078d5 Gemma GPTQ checks: skip logprob checks Daniël de Kok 2024-05-30 07:10:10 +0000
628d6a13da Add support for exl2 quantization Daniël de Kok 2024-05-28 09:51:31 +0000
4dca35fc62 feat: adjust attn weight loading logic (#1975) drbh 2024-05-29 12:42:11 -0400
2b204f0479 Fixing the text part from tokenizer endpoint. (#1967) Nicolas Patry 2024-05-28 16:55:36 +0200
9a1475d816 Fix (non-container) pytest stdout buffering-related lock-up Daniël de Kok 2024-05-28 07:25:14 +0000
cbd5d67101 Upgrade to Axum 0.7 and Hyper 1.0 (Breaking change: disabled ngrok tunneling). (#1959) Nicolas Patry 2024-05-28 14:52:17 +0200
e3d4483f9b fix small typo and broken link (#1958) Moritz Laurer 2024-05-27 17:31:06 +0200
1213b6a817 Processor config chat template (#1954) drbh 2024-05-27 10:03:16 -0400
1439b26cd4 Fix GPTQ for models which do not have float16 at the default dtype (simpler) (#1953) Daniël de Kok 2024-05-27 14:41:28 +0200
742ef9b8e5 Fix (flash) Gemma prefix and enable tests Daniël de Kok 2024-05-24 15:34:42 +0000
479f1953ba Fix seeded output. (#1949) Nicolas Patry 2024-05-24 15:36:13 +0200
92a1e0fbae Aligin the source code with main branch 2.0.4 yuanwu 2024-09-24 03:06:55 +0000
4ac0cd2339

Use Default trait when parameters: null Alvaro Bartolome 2024-09-23 21:23:39 +0200
8ef3da72e1

Fix /vertex payload parsing when MESSAGES_API_ENABLED Alvaro Bartolome 2024-09-23 20:39:20 +0200
a50e90e7e2

added v2 OlivierDehaene 2024-09-23 18:49:37 +0200
6e105c8eb8

wip OlivierDehaene 2024-09-23 18:00:59 +0200
ae2c85f485

Update docs/source/quicktour.md Aritra Roy Gosthipaty 2024-09-23 16:50:34 +0530
f92ff5cdab

Update docs/source/quicktour.md Aritra Roy Gosthipaty 2024-09-23 16:50:27 +0530
ee9f0b56c5

Update docs/source/quicktour.md Aritra Roy Gosthipaty 2024-09-23 15:47:26 +0530
5b78abee4b chore: adding note for private models in quicktour doc ariG23498 2024-09-23 15:17:56 +0530
9263817c71

nix: remove unused _server.nix file (#2538) Daniël de Kok 2024-09-23 09:43:23 +0200
956f02ed40

Update the link to the Ratatui organization Orhun Parmaksız 2024-09-21 10:15:16 +0200