text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 12:24:53 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

8770b39c20 fix: remove accidentally included guideline from rebase drbh 2024-11-22 13:50:30 -0500
4069955e44 fix: bump test output drbh 2024-11-21 12:04:24 -0500
7486d930f8 fix: remove unneeded launcher args in continue test drbh 2024-11-21 09:09:39 -0500
70066e6d8c fix: remove continue_final_message chat request param David Holtz 2024-11-19 21:24:18 +0000
d6280141de fix: bump openapi docs David Holtz 2024-11-08 20:46:58 +0000
b2ae92e470 feat: add test for continue final message David Holtz 2024-11-08 19:00:05 +0000
c782a78623 feat: support continue_final_message param in chat request drbh 2024-11-07 18:24:58 -0400
2d1e80d248 fix: only use eos_token_id as pad_token_id if int Dmitry Rogozhkin 2024-11-22 10:36:26 -0800
d2ed52f531

v2.4.1 v2.4.1 git_v2.4.1 OlivierDehaene 2024-11-22 18:28:39 +0100
780531ec77

chore: prepare 2.4.1 release (#2773) OlivierDehaene 2024-11-22 18:26:15 +0100
7213d30141

fmt OlivierDehaene 2024-11-22 17:39:20 +0100
690702b1ce

fix tests OlivierDehaene 2024-11-22 16:09:14 +0100
bb87333d19

chore: prepare 2.4.1 release OlivierDehaene 2024-11-22 15:50:44 +0100
e87893d38e

chore: Update to marlin-kernels 0.3.6 (#2771) Daniël de Kok 2024-11-22 15:44:47 +0100
9025a26cea chore: remove unrelated change to trtllm Morgan Funtowicz 2024-11-22 15:42:09 +0100
862a519fdd misc(doc): rust documentation Morgan Funtowicz 2024-11-22 15:35:55 +0100
b9c04b9c07 misc(doc): c++ documentation Morgan Funtowicz 2024-11-22 15:13:54 +0100
4ee2ee58c9 misc(license): update LICENSE Morgan Funtowicz 2024-11-22 14:48:39 +0100
afb381033b Update to marlin-kernels 0.3.6 Daniël de Kok 2024-11-22 09:28:34 +0000
2d9465d181 misc(backend): allow rebinding numa core affinity Morgan Funtowicz 2024-11-22 14:02:58 +0100
30ae99631c misc(docker): add numa lib as dependency Morgan Funtowicz 2024-11-22 13:34:52 +0100
5a85661661 feat(backend): rely on multi consumer queue to scheduler workers Morgan Funtowicz 2024-11-22 13:32:56 +0100
b6e3ffb037

Merge branch 'main' into feature/get-trace-id-from-req-headers Hyeongchan Kim 2024-11-22 13:25:25 +0900
84eead219a feat(backend): correctly setup llama_context providing n_threads and n_ubatch Morgan Funtowicz 2024-11-21 21:43:50 +0100
ab7ccf5bc3

feat: add payload limit (#2726) OlivierDehaene 2024-11-21 19:20:15 +0100
e830508c20

update launcher OlivierDehaene 2024-11-21 19:13:35 +0100
d5bc6a20bd

feat: Add automatic nightly benchmarks (#2591) Hugo Larcher 2024-11-21 18:11:42 +0100
d012f229c6

Remove guideline from API (#2762) Lucain 2024-11-21 17:56:38 +0100
c5b5b3a11c

docs: Add a README section about using Nix (#2767) Daniël de Kok 2024-11-21 17:53:27 +0100
faa10ad0bc

fix: tweak grammar test response (#2769) drbh 2024-11-21 11:46:00 -0500
8e0c161d0a

fix: incomplete generations w/ single tokens generations and models that did not support chunking (#2770) OlivierDehaene 2024-11-21 17:37:55 +0100
489675b5e5

entries was wrongly extended for model that did not support chunking OlivierDehaene 2024-11-21 15:24:04 +0100
322565d8f2 fix: tweak grammar test response drbh 2024-11-21 09:13:27 -0500
4cbba33139

Incomplete generation stream fix (#2754) Wang, Yi 2024-11-21 22:06:26 +0800
50c376612c feat(backend): bind thread and memory affinity for thread Morgan Funtowicz 2024-11-21 13:52:38 +0100
3c54488638

nix: downgrade to outlines 0.1.3 (#2768) Daniël de Kok 2024-11-21 13:00:26 +0100
2a68d6db09 nix: downgrade to outlines 0.1.3 Daniël de Kok 2024-11-21 11:21:23 +0000
56e3b65c46 Add a README section about using Nix Daniël de Kok 2024-11-21 08:53:16 +0000
6ee8d6dd3b

fix: set outlines version to 0.1.3 to avoid caching serialization issue (#2766) drbh 2024-11-20 18:09:39 -0500
5335bf973b feat(backend): multistream inference on CPU Morgan Funtowicz 2024-11-21 00:03:05 +0100
613fa03b63 fix: set outlines version to 0.1.3 to avoid bug drbh 2024-11-20 16:57:08 -0500
07bed530f7

nix: build and cache impure devshells (#2765) Daniël de Kok 2024-11-20 20:56:11 +0100
aa46309f8d Fix Nix build, disable pure shell (covered by Nix tests) Daniël de Kok 2024-11-20 19:30:30 +0000
45c6ae6dd3 nix: add poetry to the impure shell Daniël de Kok 2024-11-20 18:59:17 +0000
98db89b8b6 nix: build and cache all devshells Daniël de Kok 2024-11-20 18:43:31 +0000
46a5a7e73e

Add support for wNa16 int 2:4 compressed-tensors checkpoints (#2758) Daniël de Kok 2024-11-20 18:25:23 +0100
2fda8845a7

nix: update for outlines 0.1.4 (#2764) Daniël de Kok 2024-11-20 18:24:29 +0100
80cfe1b16c nix: update for outlines 0.1.4 Daniël de Kok 2024-11-20 16:17:12 +0000
74a8a820ad Use FP8 KV cache when specified by compressed-tensors Daniël de Kok 2024-11-20 12:31:47 +0000
45013b60a4 Install compressed-tensors in Docker CPU builds Daniël de Kok 2024-11-20 14:17:47 +0000
87004ae711

Remove guideline from API Wauplin 2024-11-20 13:47:59 +0100
5f52e2e38e entries.len() could > batch.size in prefill, so need to filter as well. Wang, Yi A 2024-11-19 23:27:45 -0800
bd6e8b3c13

fix: adjust llama MLP name from dense to mlp to correctly apply lora (#2760) drbh 2024-11-19 15:10:22 -0500
91fe29c1b1 fix: adjust llama MLP name from dense to mlp to correctly apply lora drbh 2024-11-19 14:51:46 -0500
5489406c4a

PR 2634 CI - Fix the tool_choice format for named choice by adapting OpenAIs scheme (#2645) drbh 2024-11-19 13:31:59 -0500
070af963f8 Add support for wNa16 int 2:4 compressed-tensors checkpoints Daniël de Kok 2024-11-19 13:49:11 +0000
2007a9473a

Update to moe-kernels 0.7.0 (#2720) Daniël de Kok 2024-11-19 14:55:29 +0100
2b9d692831 Update to moe-kernels 0.7.0 Daniël de Kok 2024-11-04 15:04:04 +0000
b4ec427ad0

Simplify two ipex conditions (#2755) Daniël de Kok 2024-11-19 08:04:23 +0100
d49ce00f40

With this change, bucketing/padding of input is applied to health check. (#245) srajabos 2024-11-18 16:38:30 -0500
38cff84a3e

feat: support flash attention 2 in qwen2 vl vision blocks (#2721) drbh 2024-11-18 12:46:40 -0500
3c9df21ff8

Add support for compressed-tensors w8a8 int checkpoints (#2745) Daniël de Kok 2024-11-18 17:20:31 +0100
c6393c5512 Simplify two ipex conditions Daniël de Kok 2024-11-18 16:18:59 +0000
a5ecd6e586

add ipex moe implementation to support Mixtral and PhiMoe (#2707) Wang, Yi 2024-11-19 00:16:55 +0800
70409f09f4 fix: calc max_seqlen once and small refactors David Holtz 2024-11-18 15:34:08 +0000
fea62e928f

fix: improve find_segments via numpy diff (#2686) drbh 2024-11-18 09:51:06 -0500
05f98efc9d Small fixes Daniël de Kok 2024-11-18 14:49:59 +0000
3eb6c1ccf8

Fix a typo Daniël de Kok 2024-11-18 15:45:52 +0100
e0018723fc Use marlin-kernels 0.3.5 Daniël de Kok 2024-11-18 12:43:12 +0000
53b6f6e604

Apply suggestions from code review ipex-moe Wang, Yi 2024-11-18 19:28:07 +0800
f76c0ff17f Always use dynamic input quantization for w8a8 int Daniël de Kok 2024-11-18 10:54:51 +0000
b2dc10aea5 Add support for compressed-tensors w8a8 int checkpoints Daniël de Kok 2024-11-14 11:00:29 +0000
e0e39fa0d9

Merge branch 'main' into moe Wang, Yi 2024-11-18 09:45:05 +0800
52e48739a5

Remove vLLM dependency for CUDA (#2751) Daniël de Kok 2024-11-17 17:34:50 +0100
6489f85269

feat: return streaming errors as an event formatted for openai's client (#2668) drbh 2024-11-15 08:49:19 -0500
d8f1203bcb

Small lifting. Nicolas Patry 2024-11-15 14:48:23 +0100
110d154777 Fix clippy warning Daniël de Kok 2024-11-15 13:44:26 +0000
5d9613e0c5

Revert "Reworked the implementation." Nicolas Patry 2024-11-15 14:27:16 +0100
df72deac26

Reworked the implementation. Nicolas Patry 2024-11-15 20:24:47 +0700
22d205aa47

Revert "fix: improve streamin error to include error_type" drbh 2024-10-25 11:55:44 -0400
a9c8c6a0d7

fix: improve streamin error to include error_type David Holtz 2024-10-25 14:35:25 +0000
21378b325b

fix: improve stream api error format and add status code drbh 2024-10-22 11:59:14 -0400
0ae84e5473

fix: propagate completions error events to stream drbh 2024-10-22 09:53:15 -0400
84cd8434b0

feat: return streaming errors as an event formatted for openai's client drbh 2024-10-18 14:15:27 -0400
dfc00f7fb3 Remove vLLM dependency for CUDA Daniël de Kok 2024-11-15 12:31:30 +0000
34a3bdedc3

Upgrading our deps. (#2750) Nicolas Patry 2024-11-15 21:03:27 +0800
b52d6332e4

Fixup. Nicolas Patry 2024-11-15 13:45:22 +0100
8dffe1ca08

fixup. Nicolas Patry 2024-11-15 13:33:47 +0100
1623a56544

Upgrading our deps. Nicolas Patry 2024-11-15 13:26:06 +0100
4580ced091

Upgrade outlines to 0.1.1 (#2742) Alex Weston 2024-11-15 07:22:52 -0500
003eaec0fb

fix response type of document for Text Generation Inference (#2743) jito 2024-11-15 21:21:50 +0900
4f4857a4ac

Fix: Change embeddings to embedding (#2738) Billel Mokeddem 2024-11-15 16:16:15 +0400
f9ee46f740

Fix: Change model_type from ssm to mamba (#2740) Billel Mokeddem 2024-11-15 16:15:36 +0400
8442f1ac85

benchmark: fix prefill throughput (#2741) Daniël de Kok 2024-11-15 13:14:55 +0100
ca4f46ddfc

nix: update nixpkgs (#2746) Daniël de Kok 2024-11-14 18:48:20 +0100
c908aab440 nix: update nixpkgs Daniël de Kok 2024-11-14 16:33:04 +0000
23d2bcf28d misc(build): improve build process Morgan Funtowicz 2024-11-14 09:38:13 +0100
70c90ad933 feat(backend): update llamacpp to 4077 Morgan Funtowicz 2024-11-14 09:04:06 +0100
6f059c4b5d feat(backend): wrap Arc tokenizer to avoid duplicating Morgan Funtowicz 2024-11-14 08:41:38 +0100
57b215467b feat(backend): simplify Rust callback Morgan Funtowicz 2024-11-13 00:22:11 +0100