text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 12:24:53 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

f2ea123ccd Apply suggestions from code review Nicolas Patry 2024-04-22 16:43:28 +0200
0e91e131b8 Update server/text_generation_server/models/custom_modeling/llava_next.py Nicolas Patry 2024-04-22 16:43:09 +0200
af08e359af Dummy changes. Nicolas Patry 2024-04-22 14:10:30 +0000
b564adc057 Delete server/text_generation_server/models/custom_modeling/idefics2_modeling.py Nicolas Patry 2024-04-22 16:00:36 +0200
f2d8c2e76f Fixing features for llava_next. Still issues with warmup and truncation atm. Nicolas Patry 2024-04-22 09:54:51 +0000
ae2b4e1c23 Operational. Nicolas Patry 2024-04-19 22:39:30 +0000
613dc93617 Idefics2 in working state. Nicolas Patry 2024-04-19 16:30:16 +0000
f68ccfd023 Temporary dump of idefics2. Nicolas Patry 2024-04-17 16:18:08 +0000
9f3ce55ce2 Idefics2. Nicolas Patry 2024-03-26 09:34:13 +0000
9f44af470c Temporary implem of torch.compile on our stuff. Nicolas Patry 2024-03-21 18:56:40 +0000
c6fafff7f2 Update server/text_generation_server/utils/logits_process.py drbh 2024-04-22 13:42:15 -0400
d969151a1e fix: avoid frequency and repetition penalty on padding tokens drbh 2024-04-19 03:23:30 +0000
87c4828c4e feat: allow null eos and bos tokens in config drbh 2024-04-22 13:58:46 -0400
ed72e92126

fix typos in docs and add small clarifications (#1790) Moritz Laurer 2024-04-22 18:15:48 +0200
26b3916612

Make --cuda-graphs work as expected (bis) (#1768) fxmarty 2024-04-22 16:09:19 +0200
22d0c9bba2 add guidance to landing page list of features moritzlaurer 2024-04-22 16:05:36 +0200
f04b1b343d

Update docs/source/conceptual/guidance.md drbh 2024-04-22 10:01:56 -0400
5a8cabf904 fix small typos in streaming docs moritzlaurer 2024-04-22 15:56:43 +0200
f9f23aaf2c add clarifying example for n-gram speculation moritzlaurer 2024-04-22 15:47:49 +0200
e041c78de4 remove chat template tokens for consistency with examples above (or add them to all examples) moritzlaurer 2024-04-22 15:12:17 +0200
bc4f42ad36 small typos moritzlaurer 2024-04-22 15:10:40 +0200
efd4b97d15 v1.4.0 (#1494) OlivierDehaene 2024-01-26 19:04:57 +0100
ac580f515b feat: add tokenizer-config-path to launcher args (#1495) drbh 2024-01-26 12:01:33 -0500
4b376b30f1 GPTQ support on ROCm (#1489) fxmarty 2024-01-26 16:27:44 +0100
5d663fb85d Update the docs to include newer models. (#1492) Nicolas Patry 2024-01-26 16:07:31 +0100
5134d9ccc3 fix: launcher doc typos (#1462) Andrés Restrepo 2024-01-26 08:10:07 -0500
9fd5f5150c Trying to fix that flaky test. (#1491) Nicolas Patry 2024-01-26 14:06:27 +0100
b064b33e8b Add sealion mpt support (#1477) Nicolas Patry 2024-01-26 14:05:02 +0100
ea2aa53805 Reinstate exl2 with tp (#1490) Nicolas Patry 2024-01-26 14:00:29 +0100
82f20c4788 fix: launcher doc typos (#1473) Nicolas Patry 2024-01-26 10:41:58 +0100
41fbf5c254 fix: show warning with tokenizer config parsing error (#1488) drbh 2024-01-26 04:41:39 -0500
a1124f7b8b Update the docs Nicolas Patry 2024-01-26 10:13:23 +0100
ac0be8a6a4 fix: read stderr in download (#1486) OlivierDehaene 2024-01-25 18:16:03 +0100
b2fc097b2b feat: adds phi model (#1442) drbh 2024-01-25 09:37:53 -0500
be9bfae18c Add a new /tokenize route to get the tokenized input (#1471) Nicolas Patry 2024-01-25 14:19:03 +0100
ae222cce6e Add messages api compatibility docs (#1478) drbh 2024-01-24 11:41:28 -0500
2a3a9c526b Fixing non divisible embeddings. (#1476) Nicolas Patry 2024-01-24 13:08:41 +0100
1b99d4c0b6 Disable decoder_input_details on OpenAI-compatible chat streaming, pass temp and top-k from API (#1470) Jacob Keisling 2024-01-23 08:55:05 -0600
5836a1cc69 feat: conditionally toggle chat on invocations route (#1454) drbh 2024-01-22 10:29:01 -0500
935ee00749 chore: bump rust version and annotate/fix all clippy warnings (#1455) drbh 2024-01-22 09:22:54 -0500
77afb882dc feat: support raise_exception, bos and eos tokens (#1450) drbh 2024-01-18 06:31:56 -0500
76b226b00c feat: supports openai chat completions API (#1427) drbh 2024-01-16 05:07:41 -0500
12cfc7930b Return prompt vs generated tokens. (#1436) Nicolas Patry 2024-01-11 19:01:43 +0100
e930ad9cec Fix local load for Medusa (#1420) PYNing 2024-01-11 01:36:20 +0800
af63e3273f fix: follow base model for tokenizer in router (#1424) OlivierDehaene 2024-01-10 16:35:54 +0100
92ddb41d95 Fix missing make target platform for local install: 'install-flash-attention-v2' (#1414) R. P. Ruiz 2024-01-09 10:19:31 -0500
118344b99d fix: fix local loading for .bin models (#1419) OlivierDehaene 2024-01-09 15:21:00 +0100
fc9173aa59 docs: update required CUDA version to 12.2 OlivierDehaene 2024-01-09 14:28:55 +0100
62646c2a54 v1.3.4 OlivierDehaene 2023-12-22 15:46:04 +0100
8cc4306f72 Fix local load for peft (#1373) Nicolas Patry 2023-12-21 17:29:23 +0100
7eeabb9cda feat: update exllamav2 kernels (#1370) OlivierDehaene 2023-12-21 17:25:22 +0100
3e22ad985e docs: Change URL for Habana Gaudi support in doc (#1343) regisss 2023-12-21 11:05:35 +0100
be05972911 Peft safetensors. (#1364) Nicolas Patry 2023-12-20 15:37:14 +0100
75f954df6c

ensure aiohttp session exists Sabidao 2024-04-21 18:10:28 +0300
3116fb5113

Merge branch 'huggingface:main' into main Sabidao 2024-04-21 17:53:17 +0300
aef931ea5d fix fa2 triton kernel not working with MQA/GQA fxmarty 2024-04-20 21:16:11 +0000
325f9774fe reenable _custom_C.LLMM1 as the culprit was FA2 triton fxmarty 2024-04-19 16:19:47 +0000
81c27ba9c2 disable _custom_C.LLMM1 as it is broken for TP>=2 fxmarty 2024-04-19 15:59:31 +0000
562cd4b06e fix fxmarty 2024-04-19 15:44:32 +0000
6d59eb2e70 revert dev only changes fxmarty 2024-04-19 15:43:28 +0000
885ce3354f User argument should be gospel and never ignored. fix_default_arg Nicolas Patry 2024-04-19 16:47:08 +0200
26ba2d50a3 feat: add how it works section drbh 2024-04-19 14:42:34 +0000
8eacae014f add missing files fxmarty 2024-04-19 13:46:54 +0000
b7299e1b7f fix: fix gpt-q with groupsize = -1 (#1358) OlivierDehaene 2023-12-18 16:07:05 +0100
ec5343ec5e cleaning fxmarty 2024-04-19 11:57:16 +0000
5ff9e81952 fix: fix offline (#1341) (#1347) OlivierDehaene 2023-12-18 10:20:08 +0100
ecb0db45af fix: fix logic if sliding window key is not present in config (#1352) OlivierDehaene 2023-12-15 14:56:17 +0100
a95e6d603d feat: relax mistral requirements (#1351) OlivierDehaene 2023-12-15 12:52:24 +0100
1b4c8b4b3e _custom_C.LLMM1 and HIP_FORCE_DEV_KERNARG=1 fxmarty 2024-04-19 11:50:01 +0000
f723e5ccb5 working fxmarty 2024-04-19 11:23:27 +0000
3600fc9dbe v1.3.3 OlivierDehaene 2023-12-15 01:20:42 +0100
bb6200503c fix: max_past default value must be -1, not 0 (#1348) OlivierDehaene 2023-12-15 01:18:39 +0100
214ec0eb49 fix: only keep stop sequence buffer if we have some OlivierDehaene 2023-12-14 17:04:58 +0100
04dbf7a506 fix: slice stopping criteria buffer OlivierDehaene 2023-12-14 17:01:43 +0100
b3c2d7291e fix: fix quant linear autotune OlivierDehaene 2023-12-14 16:45:47 +0100
28fcdcca6d fix: fix triton OutOfResources import OlivierDehaene 2023-12-14 16:04:26 +0100
0ca83be883 WIP debug Triton FA2 fxmarty 2024-04-19 11:11:26 +0000
5c9ef069ed feat: add more latency metrics in forward (#1346) OlivierDehaene 2023-12-14 15:59:38 +0100
47e522a66a wip fa2 triton & fix cudagraph bug fxmarty 2024-04-19 10:11:39 +0000
804068c207 now working fxmarty 2024-04-19 12:08:39 +0200
24d43c487e fix typo fxmarty 2024-04-19 11:49:39 +0200
c974437ba7 fix: fix gpt-q params loading OlivierDehaene 2023-12-14 11:02:16 +0100
b503b3de60 tunableop in warmup fxmarty 2024-04-19 09:09:16 +0000
2f88d8dfb3 fix: default max_new_tokens to 100 OlivierDehaene 2023-12-13 09:19:19 +0100
3016e1595f at last working! fxmarty 2024-04-18 23:31:28 +0000
2aa7e073bc

Update guidance.md to reflect grammar support dr3s 2024-04-18 16:58:07 -0400
d769f56c0a

Added reference to TPU support Brandon Royal 2024-04-18 15:29:50 -0400
e6259d9fc0 fix: reset grammar state when generation stops fix-grammar-cleanup-bug drbh 2024-04-18 17:05:52 +0000
2d0a7173d4 v2.0.1 v2.0.1 OlivierDehaene 2024-04-18 17:20:36 +0200
f9ee2c41b9

Upgrading all versions. (#1759) Nicolas Patry 2024-04-18 17:17:40 +0200
90977e9291 export requirements, fix rocm and update openapi OlivierDehaene 2024-04-18 16:53:05 +0200
05f8c85a8b v1.3.2 OlivierDehaene 2023-12-12 18:10:22 +0100
f9b58ac7a1 feat: add quant to mixtral (#1337) OlivierDehaene 2023-12-12 17:55:03 +0100
09c556dbd7 v1.3.1 OlivierDehaene 2023-12-11 16:46:44 +0100
db5053fc86 v1.3.0 OlivierDehaene 2023-12-11 14:55:03 +0100
79f268f95a chore: formatting OlivierDehaene 2023-12-11 14:49:52 +0100
9aef902982 feat: mixtral (#1328) OlivierDehaene 2023-12-11 14:43:40 +0100
a7f52f3812 Speculative (#1308) Nicolas Patry 2023-12-11 12:46:30 +0100
6e4d0feb47 Upgrading all versions. Nicolas Patry 2024-04-18 10:42:27 +0200
a41c1a6bc7 Add a stale bot. (#1313) Nicolas Patry 2023-12-05 14:42:55 +0100