text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

b9dffbd512 python now uses v3 OlivierDehaene 2024-06-03 15:50:37 +0200
188c396b88 continue refactoring OlivierDehaene 2024-06-03 15:18:03 +0200
dc07ad2691 continue refactoring OlivierDehaene 2024-06-03 15:04:27 +0200
ba59da1589 wip OlivierDehaene 2024-06-03 14:23:30 +0200
679c670293 small refactor to make router a bit more agnostic OlivierDehaene 2024-06-03 13:30:46 +0200
4dbb342fe3 small refactor to make router a bit more agnostic OlivierDehaene 2024-06-03 13:30:31 +0200
df71aafdcc router: send the input as chunks to the backend Daniël de Kok 2024-06-03 07:27:22 +0000
e2855617a1 Whatever. Nicolas Patry 2024-06-03 14:35:46 +0000
8fcbbf9e8b Attempt at yaml. Nicolas Patry 2024-06-03 14:33:06 +0000
f3c4d06bae Check temporary. Nicolas Patry 2024-06-03 14:30:35 +0000
f92411a57f router: send the input as chunks to the backend Daniël de Kok 2024-06-03 07:27:22 +0000
d1d724b027

reable xpu, broken by gptq and setuptool upgrade (#1988) Wang, Yi 2024-06-03 22:07:50 +0800
a7c744664c v2.0.1 OlivierDehaene 2024-04-18 17:20:36 +0200
11c16aa64c Upgrading all versions. (#1759) Nicolas Patry 2024-04-18 17:17:40 +0200
918916939f feat: accept list as prompt and use first string (#1702) drbh 2024-04-17 04:41:12 -0400
fea0f2f013 fix: bump clients test base url to llama (#1751) drbh 2024-04-16 16:56:47 -0400
65748c7353 Update response type for /v1/chat/completions and /v1/completions (#1747) Lucain 2024-04-16 19:26:32 +0200
2aad5f89bb feat: improve tools to include name and add tests (#1693) drbh 2024-04-16 09:02:46 -0400
be4417c310 Fixing CI. (#1748) Nicolas Patry 2024-04-15 18:47:36 +0200
903debac22 Revert comment. Nicolas Patry 2024-06-03 10:08:25 +0000
b5d7732922 Fixing tests. Nicolas Patry 2024-06-03 10:06:42 +0000
4de62562a6 Fuse gh action. Nicolas Patry 2024-06-03 10:03:58 +0000
3b082fe5f3 Python 3.9 Nicolas Patry 2024-06-03 09:33:33 +0000
9a59ebcec3 Hotfix GPTQ. Nicolas Patry 2024-06-03 09:32:12 +0000
2eac0951b1 No sccache. Nicolas Patry 2024-06-03 08:59:41 +0000
709d70305d What about /opt ? Morgan Funtowicz 2024-04-30 16:42:01 +0200
ed89e464a4 Let's try /usr/bin for sccache for Intel Morgan Funtowicz 2024-04-30 16:38:31 +0200
70690911cb let's see if we really need sudo for intel Morgan Funtowicz 2024-04-30 16:29:55 +0200
740a032ddb Upgrade all the actions deps Morgan Funtowicz 2024-04-30 16:28:21 +0200
6c0b41c037 Let's try with Python 3.8 instead of 3.9 Morgan Funtowicz 2024-04-30 16:26:58 +0200
f9786a29ba Upgrade Python setup for intel Morgan Funtowicz 2024-04-30 16:25:03 +0200
49b93d8d18 Let's dispatch Intel XPU on the right runner group Morgan Funtowicz 2024-04-30 16:22:33 +0200
00ffe4fae0 OK let's duplicate the job and dispatch on different labels Morgan Funtowicz 2024-04-30 16:19:59 +0200
c0ba3ef92e Enable TGI on XPU tests Morgan Funtowicz 2024-04-30 16:04:40 +0200
1852d107bb remove gptq change Wang, Yi A 2024-06-03 01:45:56 -0700
0b3f71c6f6 Merge branch 'main' into xpu_fix Wang, Yi A 2024-06-03 01:44:38 -0700
9add5d0af5

Fixing GPTQ imports. (#1994) Nicolas Patry 2024-06-03 10:36:29 +0200
2ae9ed20fb

Update server/text_generation_server/layers/gptq/__init__.py Nicolas Patry 2024-06-03 10:36:04 +0200
4188a25b15

Update server/text_generation_server/layers/gptq/__init__.py Nicolas Patry 2024-06-03 10:35:50 +0200
9a9b679c33 Fixing indirect GPTQ loads. Nicolas Patry 2024-06-03 08:20:37 +0000
ff5ca67f58 WIP maintenance/merge-vlm-input-prep Daniël de Kok 2024-05-31 16:14:27 +0000
ebeea9daf8 router: send the input as chunks to the backend Daniël de Kok 2024-05-30 12:31:35 +0000
fc52ba61ab router: send the input as chunks to the backend Daniël de Kok 2024-06-03 07:27:22 +0000
09590956a4 Merge branch 'main' into xpu_fix Wang, Yi A 2024-06-02 18:10:14 -0700
799a193b10 Fixing Phi3. fix_phi3 Nicolas Patry 2024-06-01 08:47:00 +0000
08b3eac2ce

single char ` addition for docs (#1989) Nicholas Broad 2024-05-31 09:42:14 -0700
64a4b88766 Fixing the CLI. Nicolas Patry 2024-05-31 16:06:34 +0000
5ab4cef67e

Fixing exl2 scratch buffer. (#1990) Nicolas Patry 2024-05-31 18:01:43 +0200
06edde9491

Purely refactors paged/attention into layers/attention and make hardware differences more obvious with 1 file per hardware. (#1986) Nicolas Patry 2024-05-31 17:57:01 +0200
b0c168d249

Update server/text_generation_server/layers/attention/xpu.py Nicolas Patry 2024-05-31 17:56:08 +0200
5b58262fea Fixing exl2 scratch buffer. Nicolas Patry 2024-05-31 15:18:44 +0000
37f955dd14

single char ` addition Nicholas Broad 2024-05-31 08:08:59 -0700
9c722a4e35 reable xpu, broken by gptq and setuptool upgrade Wang, Yi A 2024-05-31 07:52:31 -0700
659bd67fec

Update documentation version to 2.0.4 (#1980) fxmarty 2024-05-31 07:03:24 -0700
d44688b6ac Adress comments + fix 2nd path in falcon. Nicolas Patry 2024-05-31 12:43:13 +0000
c67539fbcc

Update server/text_generation_server/utils/import_utils.py Nicolas Patry 2024-05-31 12:51:35 +0200
91f55ea2b5 Removing flash decoding part so it gets merged. Nicolas Patry 2024-05-31 10:16:30 +0000
a4d81d623d update doc fxmarty 2024-05-30 13:38:06 +0200
be87c840b8 Update router/src/infer.rs Nicolas Patry 2024-05-30 11:25:37 +0200
13caf958eb Enabling custom block size schedule. Nicolas Patry 2024-05-30 05:17:00 +0000
cf59593454 Fixing falcon. Nicolas Patry 2024-05-29 18:34:34 +0000
a76e650283 Fix cohere. Nicolas Patry 2024-05-29 17:41:15 +0000
daddd2e90b Revamped all this architecture. Nicolas Patry 2024-05-29 17:36:04 +0000
7890cd66f7 Fixing cohere flash decoding. Nicolas Patry 2024-05-29 16:04:36 +0000
a6f1603525 Missing cohere. Nicolas Patry 2024-05-29 15:46:53 +0000
50d5c08b15 Router logic knows about page size. Nicolas Patry 2024-05-29 15:37:46 +0000
7a29e82629 Fixing non flash decoding llama path. Nicolas Patry 2024-05-29 12:35:32 +0000
6aeb5a73a1 HHachweew Hack to make other models work. Nicolas Patry 2024-05-29 10:52:09 +0000
6bbc843097 Speedup flashdecoding. Nicolas Patry 2024-05-24 16:10:42 +0000
ed96a76d67 REvert changes in modeling. Nicolas Patry 2024-05-24 14:18:00 +0000
be8c14be8b Less intrusive. Nicolas Patry 2024-05-24 14:15:33 +0000
8171747e4f Fix after rebase.. Nicolas Patry 2024-05-23 12:42:19 +0000
4fd3065d9c Using flash decoding Nicolas Patry 2024-05-17 08:43:33 +0000
967ced2ff4 Gemma GPTQ checks: skip logprob checks Daniël de Kok 2024-05-30 07:10:10 +0000
36dd16017c Add support for exl2 quantization Daniël de Kok 2024-05-28 09:51:31 +0000
03699839a4 Gemma GPTQ checks: skip logprob checks Daniël de Kok 2024-05-30 07:10:10 +0000
0e8f8726db

Warmup all decode buckets (#152) Karol Damaszke 2024-05-29 22:46:55 +0200
7b879fd1d8

Pad next token chooser parameters with empty logits processors (#151) Karol Damaszke 2024-05-29 22:43:56 +0200
3fa24fb217 Add support for exl2 quantization Daniël de Kok 2024-05-28 09:51:31 +0000
cbced7f0f9

feat: adjust attn weight loading logic (#1975) drbh 2024-05-29 12:42:11 -0400
3cf4354944 feat: adjust attn weight loading logic drbh 2024-05-29 15:05:57 +0000
129f0ed603 fix: adjust whl names and upload all precompile-kernels-workflow drbh 2024-05-29 09:50:58 -0400
58ac1d7e9b feat: add basic workflow drbh 2024-05-28 22:18:44 -0400
6499b8e213 fix: update workflow trigger again support-pre-compile-kernels drbh 2024-05-28 22:11:21 -0400
4692347140 fix: edit source to build drbh 2024-05-28 22:07:36 -0400
b7161c8308 fix: simplify workflow trigger trigger again drbh 2024-05-28 22:04:12 -0400
ba47345e1b fix: revert changes and change name drbh 2024-05-28 21:59:47 -0400
ca59ef23db fix: force ci to run drbh 2024-05-28 21:54:24 -0400
14088638de feat: precompile kernels drbh 2024-05-28 21:44:21 -0400
1bf32d970f fix: install hf cli before upload pip-installable drbh 2024-05-28 18:04:23 +0000
dab44ac1af feat: upload assets to hub rather than github drbh 2024-05-28 12:03:18 -0400
da1a0b3412 fix: set cuda arch list prior to vllm build drbh 2024-05-27 09:00:34 -0400
ad94f299f4 feat: compile vllm for cuda after flash_attn drbh 2024-05-26 23:21:07 -0400
8253f83034 fix: build kernels inside of repo and move to single dist drbh 2024-05-22 00:34:44 +0000
ec8c638d2b feat: cache wheel as build artifact drbh 2024-05-21 22:46:54 +0000
7765aa6ecd fix: adjust skip build typo drbh 2024-05-21 21:56:29 +0000
814e07dffe fix: build proto in CI and avoid rate limit in client test drbh 2024-05-21 21:43:25 +0000
2ee4b9f77f fix: adjust upload command drbh 2024-05-21 20:29:00 +0000
47e19377cb fix: skip redundant login drbh 2024-05-21 20:04:38 +0000
01e68b5acf feat: upload single pre compile to hub drbh 2024-05-21 19:45:49 +0000