text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 12:24:53 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

17a2c87f5c Refactor layers. (#1866) Nicolas Patry 2024-05-13 12:44:30 +0200
9e4b25c66b fix: setting the rotary base from the config for the grouped query models. Nilabhra 2024-05-14 10:14:18 +0400
5e8db7c14f add: support for falcon-10B architecture. Nilabhra 2024-04-15 13:52:20 +0400
011887f15c chore: removed unused import. Nilabhra 2024-05-14 11:00:45 +0400
56ed686942 Refactor layers. (#1866) Nicolas Patry 2024-05-13 12:44:30 +0200
c41573c67c fix: setting the rotary base from the config for the grouped query models. Nilabhra 2024-05-14 10:14:18 +0400
46ada47963 add: support for falcon-10B architecture. Nilabhra 2024-04-15 13:52:20 +0400
d3d83e7d04 Refactor layers. (#1866) Nicolas Patry 2024-05-13 12:44:30 +0200
dcd2b4425c fix: setting the rotary base from the config for the grouped query models. Nilabhra 2024-05-14 10:14:18 +0400
22c005fac3 add: support for falcon-10B architecture. Nilabhra 2024-04-15 13:52:20 +0400
80ba799c88 Granite support? (#1882) Nicolas Patry 2024-05-13 13:46:29 +0200
fffd569fa6 Refactor layers. (#1866) Nicolas Patry 2024-05-13 12:44:30 +0200
63fe93cfe9 update xpu docker image and use public ipex whel (#1860) Wang, Yi 2024-05-06 22:05:43 +0800
0c03bd8181 Upgrading to rust 1.78. (#1851) Nicolas Patry 2024-05-06 13:48:11 +0200
8fa2a8699b Add router name to /info endpoint (#1854) Lucain 2024-05-03 16:39:04 +0200
ee7c660412 Updating Phi3 (long context). (#1849) Nicolas Patry 2024-05-02 19:07:10 +0200
731303af53 feat: prefer huggingface_hub in docs and show image api (#1844) drbh 2024-05-02 10:56:24 -0400
dfafda53e7 Remove misleading warning (not that important nowadays anyway). (#1848) Nicolas Patry 2024-05-02 15:09:46 +0200
ea17ce798f Adding scripts to prepare load data. (#1841) Nicolas Patry 2024-05-01 21:48:06 +0200
33c6bb480d Fix: "Fixing" double BOS for mistral too. (#1843) Nicolas Patry 2024-05-01 18:21:17 +0200
0c3f5de379 fix: split docs and start conceptual page (#1836) drbh 2024-05-01 03:03:25 -0400
7ba395ab39 (chore): torch 2.3.0 (#1833) Nicolas Patry 2024-04-30 18:15:35 +0200
a9043412cd chore: update torch (#1730) OlivierDehaene 2024-04-30 14:04:28 +0200
10828cb8ba Handle images in chat api (#1828) drbh 2024-04-30 06:18:32 -0400
66ed33bce5 feat: add vlm docs and simple examples (#1812) drbh 2024-04-30 06:14:39 -0400
bce2c31f67 Fixing frequency penalty (#1811) Martin Iglesias Goyanes 2024-04-30 12:13:23 +0200
7f29f1c97a feat: add how it works section (#1773) drbh 2024-04-30 05:45:49 -0400
a37e0ad19e fix: use get_speculate to the number of layers (#1737) OlivierDehaene 2024-04-30 11:45:26 +0200
622aeda868 Add reference to TPU support (#1760) Brandon Royal 2024-04-30 05:39:52 -0400
38b1753f6c Small CI cleanup. (#1801) Nicolas Patry 2024-04-30 11:39:38 +0200
f1c704d2f2 Add the missing tool_prompt parameter to Python client (#1825) Maziyar Panahi 2024-04-30 11:07:17 +0200
3c126d2888 Prepare release. Nicolas Patry 2024-04-30 10:52:37 +0200
d4519fc413 Better graceful shutdown. (#1827) Nicolas Patry 2024-04-29 17:23:40 +0200
0c926eaf5e Changing the waiting_served_ratio default (stack more aggressively by default). (#1820) Nicolas Patry 2024-04-28 17:54:19 +0200
9848eb48b2 Dummy CI run. (#1817) Nicolas Patry 2024-04-26 19:19:55 +0200
2bb20a081f Fixing qwen2. (#1818) Nicolas Patry 2024-04-26 19:19:08 +0200
391658e546 Blunder (#1815) Nicolas Patry 2024-04-26 15:51:09 +0200
83cda096ed add intel xpu support for TGI (#1475) Wang, Yi 2024-04-26 21:48:58 +0800
959b026f45 Adding new env variables for TPU backends. (#1755) Nicolas Patry 2024-04-26 15:44:44 +0200
e38f89491f 2nd round of benchmark modifications (tiny adjustements to avoid overloading the host). (#1816) Nicolas Patry 2024-04-26 15:39:00 +0200
812e64b763 Use the generation config. (#1808) Nicolas Patry 2024-04-25 19:41:50 +0200
07b6014cd1 Update guidance docs to reflect grammar support in API (#1775) dr3s 2024-04-25 13:11:26 -0400
b76ba7979f Updating the benchmarks so everyone uses openai compat layer. (#1800) Nicolas Patry 2024-04-25 15:42:17 +0200
82aa5ebf0f feat: improve temperature logic in chat (#1749) drbh 2024-04-25 09:31:35 -0400
e6e5a0ae94 Adding support for HF_HUB_OFFLINE support in the router. (#1789) Nicolas Patry 2024-04-23 23:38:30 +0200
c4f1f2ba19 fix: avoid frequency and repetition penalty on padding tokens (#1765) drbh 2024-04-23 17:19:16 -0400
42769d97ca Idefics2. (#1756) Nicolas Patry 2024-04-23 23:04:44 +0200
d852d776d2 Phi3 support (#1797) Nicolas Patry 2024-04-23 18:40:05 +0200
19820b7ac2 feat: allow null eos and bos tokens in config (#1791) drbh 2024-04-23 10:26:54 -0400
9277208a5f Add attribute descriptions for GenerateParameters (#1798) Lucain 2024-04-23 16:22:12 +0200
ea62840147 fix typos in docs and add small clarifications (#1790) Moritz Laurer 2024-04-22 18:15:48 +0200
8a92aeb322 Make --cuda-graphs work as expected (bis) (#1768) fxmarty 2024-04-22 16:09:19 +0200
49e9537abe v2.0.1 OlivierDehaene 2024-04-18 17:20:36 +0200
fd13263e03 Upgrading all versions. (#1759) Nicolas Patry 2024-04-18 17:17:40 +0200
b53199fc23 feat: accept list as prompt and use first string (#1702) drbh 2024-04-17 04:41:12 -0400
280b758eca fix: bump clients test base url to llama (#1751) drbh 2024-04-16 16:56:47 -0400
b153bba455 Update response type for /v1/chat/completions and /v1/completions (#1747) Lucain 2024-04-16 19:26:32 +0200
a0a8f30a22 feat: improve tools to include name and add tests (#1693) drbh 2024-04-16 09:02:46 -0400
dadaed33f0 Fixing CI. (#1748) Nicolas Patry 2024-04-15 18:47:36 +0200
90885de12f v2.0.0 (#1736) OlivierDehaene 2024-04-12 18:38:34 +0200
0096deee7a Fix typo in guidance.md (#1735) Ikko Eltociear Ashimine 2024-04-12 23:51:07 +0900
4f28b4036e feat: medusa v2 (#1734) OlivierDehaene 2024-04-12 16:24:45 +0200
99ad49aef3 Improve the defaults for the launcher (#1727) Nicolas Patry 2024-04-12 14:20:31 +0200
c27c838f57 chore(cargo-toml): apply lto fat and codegen-units of one (#1651) Christof Weickhardt 2024-04-12 12:34:13 +0200
404f334600 fix(router): fix a possible deadlock in next_batch (#1731) OlivierDehaene 2024-04-12 10:59:04 +0200
155372a39e Upgrade EETQ (Fixes the cuda graphs). (#1729) Nicolas Patry 2024-04-12 08:15:28 +0200
72c52421d5 Fp8 Support (#1726) Nicolas Patry 2024-04-12 08:13:30 +0200
c1c81155e7 Dev/mask ldconfig output v2 (#1716) oOraph 2024-04-11 19:31:48 +0200
00dc371f47 Revert "Easier defaults for models stemmed from configs." Nicolas Patry 2024-04-11 12:51:57 +0000
bc7a9d609a Easier defaults for models stemmed from configs. Nicolas Patry 2024-04-11 12:48:39 +0000
4e855d84a3 Update libraries (#1713) abhishek thakur 2024-04-11 10:37:35 +0200
7706d4c0e8 hotfix: mixtral OlivierDehaene 2024-04-10 18:38:08 +0200
dbde165b16 fix: fix CohereForAI/c4ai-command-r-plus (#1707) OlivierDehaene 2024-04-10 17:20:25 +0200
fe3586a902 Adding Llava-Next (Llava 1.6) with full support. (#1709) Nicolas Patry 2024-04-09 21:32:00 +0200
acc995c6fa Automatic quantization config. (#1719) Nicolas Patry 2024-04-09 10:27:57 +0200
5e243eb222 Revert license to Apache 2.0 (#1714) OlivierDehaene 2024-04-08 15:06:16 +0200
435a662ed4 Regenerate ld.so.cache (#1708) oOraph 2024-04-08 08:52:10 +0200
8b817ca009 Force weights_only (before fully breaking pickle files anyway). (#1710) Nicolas Patry 2024-04-05 19:23:57 +0200
ba9cf1e51a Fixing cohere tokenizer. (#1697) Nicolas Patry 2024-04-05 16:44:19 +0200
577e0707f7 Push users to streaming in the readme. (#1698) Nicolas Patry 2024-04-05 16:44:10 +0200
66c9ab8373 Pickle conversion now requires --trust-remote-code. (#1704) Nicolas Patry 2024-04-05 13:32:53 +0200
321359dc61 Add cuda graphs sizes and make it default. (#1703) Nicolas Patry 2024-04-04 23:01:56 +0200
a2029a57ab v1.4.5 (#1686) OlivierDehaene 2024-03-29 19:17:24 +0100
14ed5a78d0 feat: Add dbrx support (#1685) OlivierDehaene 2024-03-29 18:49:36 +0100
6436f83a95 change ToolCall id to string Bao Phan 2024-05-14 13:18:28 +0700
a24bf62368 fix: setting the rotary base from the config for the grouped query models. Nilabhra 2024-05-14 10:14:18 +0400
b789b0b67c Support openai tool_call_id request Bao Phan 2024-05-14 09:45:56 +0700
6009dadee3 Model_type location. Nicolas Patry 2024-05-13 14:13:07 +0000
aceb87cc15 Remove old code again. Nicolas Patry 2024-05-13 13:26:52 +0000
027e1dabcd Backport changes in medusa. Nicolas Patry 2024-05-13 13:18:29 +0000
de11fc064a Remove traces of use_medusa. Nicolas Patry 2024-05-13 13:12:44 +0000
3397b26341 Missing update after rebase Nicolas Patry 2024-05-13 13:09:22 +0000
71a535e401 Rebase after refactor. Nicolas Patry 2024-05-13 12:44:06 +0000
b884899086 Removed a bunch of hardcodes. Nicolas Patry 2024-05-08 12:20:00 +0000
1a8a18d541 Cleanup. Nicolas Patry 2024-05-08 06:33:13 +0000
1fde6850bb Fixed speculator. Nicolas Patry 2024-05-08 06:31:40 +0000
9291d42865 [REWRITTEN] added a bunch of cleanup based on comments in PR; removed conditionals from LayerNormParameterized and renamed to MLPSpeculatorLayerNorm; now using modules for tensor-parallel (this is work in progress - looking into if this is right approach); fixed issue with getting medusa model; fixed for more efficient loading Joshua Rosenkranz 2024-05-03 10:02:11 -0400
38d6045443 Hardcode a few stuff to make it work. Nicolas Patry 2024-05-06 14:03:05 +0000
453e91f755 added a bunch of cleanup based on comments in PR; removed conditionals from LayerNormParameterized and renamed to MLPSpeculatorLayerNorm; now using modules for tensor-parallel (this is work in progress - looking into if this is right approach); fixed issue with getting medusa model; fixed for more efficient loading Joshua Rosenkranz 2024-05-03 10:02:11 -0400
6e5c19ec44 initial commit of mlp_speculator support (draft) Joshua Rosenkranz 2024-05-02 10:18:42 -0400