text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

f36c9a68ae

refine the code according to the review command Wang, Yi A 2024-10-14 21:01:54 -0700
645369bef7

set kv cache dtype Wang, Yi A 2024-10-08 08:00:06 -0400
dd3fb81719

fix ci failure Wang, Yi A 2024-09-09 23:54:55 -0700
61fe28e8f7

add gptq and awq int4 support in intel platform Wang, Yi A 2024-08-21 22:47:34 -0700
46b14e6b28

Remove all references to habana_quantization_toolkit for 1.18 (#229) Thanaji Rao Thakkalapelli 2024-10-18 01:59:59 -0700
21c13ff3a6

Remove References to torch compile mode in readme (#236) Thanaji Rao Thakkalapelli 2024-10-17 14:07:51 -0700
8ec57558cd

Break cycle between the attention implementations and KV cache (#2627) Daniël de Kok 2024-10-17 14:54:22 +0200
5f32dea1e2

fix: prefer inplace softmax to avoid copy (#2661) drbh 2024-10-17 08:49:02 -0400
3e0a82d512

Update server/text_generation_server/models/flash_causal_lm.py drbh 2024-10-17 08:48:52 -0400
90553c1dd4 Break cycle between the attention implementations and KV cache Daniël de Kok 2024-10-09 08:32:04 +0000
1b97e084bf

fix tgi-entrypoint wrapper in docker file: exec instead of spawning a child process (#2663) oOraph 2024-10-17 11:15:26 +0200
13fe82264b

Fixing "deadlock" when python prompts for trust_remote_code by always specifiying a value. Nicolas Patry 2024-10-17 10:58:07 +0200
59ea38cbca

Simplify the attention function (#2609) Daniël de Kok 2024-10-17 10:42:52 +0200
5bbe1ce028

Support e4m3fn KV cache (#2655) Daniël de Kok 2024-10-17 10:42:16 +0200
b240fd139a

tgi-entrypoint: exec instead of spawning a child process Raphael Glon 2024-10-10 18:26:20 +0200
7822bfd68f Fixup flashinfer support Daniël de Kok 2024-10-17 07:56:51 +0000
8d7448de9f fix: prefer inplace softmax to avoid copy David Holtz 2024-10-17 02:53:32 +0000
2326f2b875 Remove References to torch compile mode in readme Thanaji 2024-10-16 22:45:26 +0300
751f1bb815 Make check more obvious Daniël de Kok 2024-10-16 13:54:57 +0000
07128cc178 Simplify the attention function Daniël de Kok 2024-10-04 09:42:20 +0000
a6a0c97ed9

feat: prefill chunking (#2600) OlivierDehaene 2024-10-16 12:49:33 +0200
8ae5d4c7d6

Ignore EOS for benchmark by using TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN (#234) Sun Choi 2024-10-16 02:57:36 -0700
812aa1c01d

Fix env name Nicolas Patry 2024-10-16 11:18:22 +0200
52eaa1f4d8

Put back non default simple tests. Nicolas Patry 2024-10-16 11:17:53 +0200
ff36b2fb39

Add simple resolution when user specifies ATTENTION=paged. Nicolas Patry 2024-10-16 10:56:58 +0200
5c72f269b6

Fix prefix_caching variable, remove defaults in server (confusing a lot of the times). Nicolas Patry 2024-10-16 10:46:03 +0200
594a2b4a3d

rename OlivierDehaene 2024-10-16 10:23:21 +0200
704a58c807

Fp8 e4m3_fnuz support for rocm (#2588) Mohit Sharma 2024-10-16 13:24:50 +0530
aa92e451a0 Support e4m3fn KV cache Daniël de Kok 2024-10-16 07:48:10 +0000
d07e7f4f62

Merge pull request #233 from huggingface/fix_sysntax Mandy Li 2024-10-15 14:33:21 -0700
87a1cee32c

Fix sysntax error in PR 232 Thanaji Rao Thakkalapelli 2024-10-15 13:23:48 -0700
e06320f64e

Enabling Flash Attention support for falcon model (#232) Thanaji Rao Thakkalapelli 2024-10-15 10:50:17 -0700
fc41f0784a

lint fix. Nicolas Patry 2024-10-15 18:46:56 +0200
5c8c5ac81a

Merge branch 'main' into feat/prefix_chunking Nicolas Patry 2024-10-15 18:28:27 +0200
ffe05ccd05

Rollback to ChatRequest for Vertex AI Chat instead of VertexChat (#2651) Alvaro Bartolome 2024-10-15 18:11:59 +0200
fa491e730b

Fixing dtype + AMD, Ipex targets. Nicolas Patry 2024-10-15 17:56:03 +0200
b3917ff695 fix: add limit to internal stream function too adjust-where-request-max-tokens-is-defaulted David Holtz 2024-10-15 15:14:04 +0000
595640e35c fix: enforce default max request tokens in generate_internal David Holtz 2024-10-15 15:08:23 +0000
cebd1b47f5

Rollback to ChatRequest for Vertex AI Chat instead of VertexChat Alvaro Bartolome 2024-10-15 16:41:05 +0200
4fa4da3cb6

Fixing non blocked attentions Nicolas Patry 2024-10-15 16:12:00 +0200
fb4d2080af

Merge branch 'main' into cpu_perf Wang, Yi 2024-10-15 21:50:15 +0800
ce7e356561 Use flashinfer for Gemma 2. Daniël de Kok 2024-10-15 13:49:32 +0000
689aa26db2 (nit) improved comment Mohit Sharma 2024-10-15 12:16:03 +0000
1de96279e3 (review_comments) fix typo and added comments Mohit Sharma 2024-10-15 12:01:12 +0000
cf04a43fb1

Fixing linters. (#2650) Nicolas Patry 2024-10-15 12:43:49 +0200
39b86f7f16

Fixing linters. Nicolas Patry 2024-10-15 12:26:32 +0200
b2b5024ec8 (bug) update all has_tensor Mohit Sharma 2024-10-15 07:51:03 +0000
64b0337574 feature: get trace id from req headers kozistr 2024-10-15 15:14:20 +0900
7ca47777aa

Update mod.rs smith518 2024-10-15 11:22:34 +0530
b069d2c131 refine the code according to the review command Wang, Yi A 2024-10-14 21:01:54 -0700
7c6230c59a Merge branch 'main' into gpt_awq_4 Wang, Yi A 2024-10-14 20:28:15 -0700
58848cb471

feat: enable pytorch xpu support for non-attention models (#2561) Dmitry Rogozhkin 2024-10-14 09:28:49 -0700
7a82ddcbd0

update ipex to fix incorrect output of mllama in cpu (#2640) Wang, Yi 2024-10-14 22:32:33 +0800
51f5401893

Clarify gated description and quicktour (#2631) Omar Sanseviero 2024-10-14 16:31:37 +0200
09d73e56ca

remove docker entrypoint Raphael Glon 2024-10-14 15:52:09 +0200
c9e0f36dbc Machete WIP feature/machete Daniël de Kok 2024-10-14 07:59:09 +0000
3ea82d008c

Cpu perf (#2596) Nicolas Patry 2024-10-14 15:34:08 +0200
ce28ee88d5

Small fixes for supported models (#2471) Omar Sanseviero 2024-10-14 15:26:39 +0200
406725e05f

Updating the doc (we keep the list actually). Nicolas Patry 2024-10-14 15:19:02 +0200
7a7cd5f299 (review comments) Fix compression_config load, type hints Mohit Sharma 2024-10-14 11:51:11 +0000
0578bd917d

Fix gpt_bigcode/starcoderbase-3b accuracy issue (#228) Sun Choi 2024-10-14 01:01:55 -0700
af546505ad

add gfx1100 support to AMD pytorch build Drew Paettie 2024-10-12 22:55:49 -0700
4be95899ca update ipex to fix incorrect output of mllama in cpu Wang, Yi A 2024-10-12 18:49:59 -0700
0c478846c5

Fixing intel Supports windowing. (#2637) Nicolas Patry 2024-10-11 21:47:03 +0200
fe2f251504

Fixing intel Supports windowing. Nicolas Patry 2024-10-11 21:28:22 +0200
5e70158b2c

remove support chunking for paged OlivierDehaene 2024-10-11 15:19:14 +0200
b392362e9e direct return in clamp like rocm Wang, Yi A 2024-10-10 23:02:56 -0700
f213012b08 Merge branch 'main' into sliding_window Wang, Yi A 2024-10-10 22:58:27 -0700
05d68ae5c2 add tests Linus Bierhoff 2024-10-10 19:41:51 +0200
2285b0d63e add OpenAI like tool_choice for named choice Linus Bierhoff 2024-10-10 18:50:32 +0200
f18a460181

propagate signal from entrypoint to tgi Raphael Glon 2024-10-10 18:26:20 +0200
df98299919

fix cargo tests OlivierDehaene 2024-10-10 16:54:42 +0200
3dbdf63ec5

Intel ci (#2630) Nicolas Patry 2024-10-10 16:51:57 +0200
f923a3fb68

fix mllama OlivierDehaene 2024-10-10 16:01:18 +0200
b7a1280f25

fix tests OlivierDehaene 2024-10-10 14:52:09 +0200
f85a308ef1

remove debugging lines OlivierDehaene 2024-10-09 20:05:39 +0200
d361197aab

omfg OlivierDehaene 2024-10-09 20:04:06 +0200
d73c5c634d

max input length OlivierDehaene 2024-10-09 19:39:14 +0200
57f55fe834

idk at this point OlivierDehaene 2024-10-09 19:17:18 +0200
3ace1b2f8d

fix logprobs? OlivierDehaene 2024-10-09 17:33:15 +0200
08953c5975

fix launcher OlivierDehaene 2024-10-08 19:23:45 +0200
ea4b739a9f

fix prefill logprobs OlivierDehaene 2024-10-07 17:12:31 +0200
3924b87a04

rename to cache and input lengths OlivierDehaene 2024-10-07 15:14:03 +0200
8188deac22

fix vlm and seq2seq OlivierDehaene 2024-10-07 15:08:30 +0200
460e830444

fix benchmarker OlivierDehaene 2024-10-07 14:45:52 +0200
4ddea01c6e

remove log OlivierDehaene 2024-10-07 12:11:50 +0200
c8a033b636

feedback loop OlivierDehaene 2024-10-07 12:02:25 +0200
ff4155dfea

fix slot_filtering_indices OlivierDehaene 2024-10-02 19:16:36 +0200
b49978ff67

re-create slots OlivierDehaene 2024-10-02 14:17:26 +0200
4db5e7dde6

re-create slots OlivierDehaene 2024-10-02 14:10:33 +0200
7f9abde3f8

load tested OlivierDehaene 2024-10-02 12:59:44 +0200
34f5dc525e

working OlivierDehaene 2024-10-01 09:51:34 +0200
173bc99ab3

add prepare_for_prefill OlivierDehaene 2024-09-30 17:58:14 +0200
0e31619893

current OlivierDehaene 2024-09-30 11:03:13 +0200
962ccfd5b7

wip, no filter, no concat OlivierDehaene 2024-09-26 17:10:00 +0200
a85f5ebecd

fix filter and concat OlivierDehaene 2024-09-25 15:34:08 +0200
e4f9110e14

maybe patching vlms? OlivierDehaene 2024-09-25 14:54:59 +0200
838756eb18

refactor to use prefix/postfix namming + fix all_input_ids_tensor OlivierDehaene 2024-09-25 14:40:47 +0200
de043b53c4

rollback OlivierDehaene 2024-09-25 13:57:18 +0200
7169cbae6d

wip OlivierDehaene 2024-09-20 14:25:51 +0200