text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-12 04:44:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

4498d6bc47 run python3 udpate_doc.py erikkaum 2024-07-31 12:04:22 +0200
4a0fdad1a7 changes based on feedback erikkaum 2024-07-31 11:55:09 +0200
00478579e3

Update router/src/server.rs Erik Kaunismäki 2024-07-31 11:50:25 +0200
f4f0cb81f2

Update docs/source/usage_statistics.md Erik Kaunismäki 2024-07-31 11:43:43 +0200
5310ba0119 refactor usage stats erikkaum 2024-07-31 11:29:19 +0200
31ebfd0dd7 (launcher) default new server::run parameters to false for now Morgan Funtowicz 2024-07-31 09:06:52 +0000
8989c585c6 (docker) build ompi with SLURM support Morgan Funtowicz 2024-07-31 09:06:24 +0000
ae66cf5593 (docker) let's put rust in the TRTLLM folder when building Morgan Funtowicz 2024-07-31 09:06:11 +0000
2b19d671b4

Rebase TRT-llm (#2331) Nicolas Patry 2024-07-31 10:33:10 +0200
2c890d4cdf enable HuggingFaceM4/idefics-9b in intel gpu Wang, Yi A 2024-07-31 01:31:02 -0700
f5f09ae9a8 Handle GPTQ-Marlin loading in GPTQMarlinWeightLoader Daniël de Kok 2024-07-24 14:36:52 +0000
3d21c8f43a reable gemma2 in xpu Wang, Yi A 2024-07-30 22:20:49 -0700
4d28e29236 hotfix: fix xpu crash brought by code refine. torch.xpu rely on import ipex Wang, Yi A 2024-07-30 19:28:59 -0700
5123925101 fix: warn window_size_left when using flash attn 1 drbh 2024-07-30 20:24:48 +0000
4b1005c7e1 fix: attempt forward on flash attn2 to check hardware support drbh 2024-07-30 17:20:40 +0000
6e564a30a2 link against libtensorrt_llm and not libtensorrt-llm Morgan Funtowicz 2024-07-30 17:01:38 +0000
98739b2035 provided None for api_key Morgan Funtowicz 2024-07-30 17:01:07 +0000
579199f6f2 update TensorRT-LLM to latest version Morgan Funtowicz 2024-07-30 17:00:44 +0000
5c81a1713c

Fixing PB with default member backends/client Nicolas Patry 2024-07-30 18:45:17 +0200
dc2feb4e6f

Remove PB from git. Nicolas Patry 2024-07-30 18:42:55 +0200
9357fc162a

Backporting 457fb0a1 Nicolas Patry 2024-07-30 18:21:11 +0200
bbdd26e2be

Backporting telemetry. Nicolas Patry 2024-07-30 18:18:39 +0200
b2edffabb9

Remove both check + clippy ? Nicolas Patry 2024-07-30 17:02:41 +0200
f9d4a08f21

Tmp. Nicolas Patry 2024-07-30 17:00:05 +0200
3e19ce117c

? Nicolas Patry 2024-07-30 16:52:31 +0200
2641c853ad

Remove cargo fmt temporarily. Nicolas Patry 2024-07-30 16:38:38 +0200
1dbcf7532e

Adding pb files ? Nicolas Patry 2024-07-30 16:22:10 +0200
db17050c22

Fix trtllm lint. Nicolas Patry 2024-07-30 16:21:07 +0200
e3418c3340

Updating the schema thing + redocly. Nicolas Patry 2024-07-30 16:14:52 +0200
fa687dd340

Fix makefile + autodocs. Nicolas Patry 2024-07-30 15:40:02 +0200
2611c1a55f

Fixing client. Nicolas Patry 2024-07-30 15:27:57 +0200
53aec27328

server quantize: store quantizer config in standard format (#2299) Daniël de Kok 2024-07-30 15:16:20 +0200
ad7d8b3432

Ignore backends/v3 by default. Nicolas Patry 2024-07-30 12:47:34 +0200
f6b60bab73

Let's try to enable trtllm backend. Nicolas Patry 2024-07-30 12:37:12 +0200
33c4b0d8c3

Fix autodocs. Nicolas Patry 2024-07-30 12:35:12 +0200
bc0a33e1c9

Rebase. Nicolas Patry 2024-07-30 12:22:24 +0200
05c13c89de Remove useless modification yuanwu 2024-07-30 10:05:38 +0000
ddbbf6b50c

wip OlivierDehaene 2024-06-26 12:08:56 +0200
8fad7ae5a2 add some more basic info in README.md backends/trtllm-executor Morgan Funtowicz 2024-07-30 08:45:29 +0000
b665e2fa0a look for cuda 12.5 Morgan Funtowicz 2024-07-30 08:45:20 +0000
67c0b5eb6d add numa to improve cpu inference perf Wang, Yi A 2024-07-30 00:03:23 -0700
3f0f0e0825 Add the habana profiler yuanwu 2024-07-30 03:53:46 +0000
db0b6567e1 Remove log yuanwu 2024-07-29 22:02:42 +0000
588a014551 Enable llava-next yuanwu 2024-07-28 09:05:49 +0000
0986835548 fix: remove global model id drbh 2024-07-29 16:58:42 +0000
a014b220e1 MODEL_ID propagation fix root 2024-07-24 03:35:53 +0000
2ce1476e52 fix: remove global model id drbh 2024-07-29 16:51:53 +0000
c2413a0153 MODEL_ID propagation fix root 2024-07-24 03:35:53 +0000
95ff267043 fix: remove global model id drbh 2024-07-29 16:30:53 +0000
2aa9e3c23d Merge commit 'refs/pull/2290/head' of github.com:huggingface/text-generation-inference into main drbh 2024-07-29 16:28:42 +0000
3fb74e4626 fix: remove global model id drbh 2024-07-29 16:22:38 +0000
1246e2193f

Merge 592ea3f2f8 into 0b95693fb8 Edwin Hernandez 2024-07-29 11:22:36 -0500
c954a5c92a Merge branch 'pr-2290' into pr-2290-ci drbh 2024-07-29 16:19:28 +0000
0b95693fb8

fix: adjust test snapshots and small refactors (#2323) pr-2290-ci-runner drbh 2024-07-29 11:38:38 -0400
3d7f4f41bb

patch-error-on-invalid-grammar (#2282) Erik Kaunismäki 2024-07-29 16:09:25 +0200
f15e808d4c

fix: reject grammars without properties (#2309) drbh 2024-07-29 10:07:25 -0400
b5f61e92b5 fix: revert non snapshot changes drbh 2024-07-29 14:05:55 +0000
922732b255

Install Marlin from standalone package (#2320) Daniël de Kok 2024-07-29 15:37:10 +0200
8e4a3b8dd7 Install Marlin from standalone package Daniël de Kok 2024-07-26 14:22:13 +0000
583d37a2f8

Run ci api key (#2315) Erik Kaunismäki 2024-07-29 11:14:17 +0200
4f69d04c3a hotfix: increase precision of GPTQ/AWQ-Marlin Daniël de Kok 2024-07-29 08:40:17 +0000
fd2e06316d

fix: fix buildkit config in ci Adrien 2024-07-29 09:25:56 +0200
68854d11ef fix: adjust test snapshots and small refactors drbh 2024-07-28 22:58:12 +0000
bab02ff2bc

feat: add ruff and resolve issue (#2262) drbh 2024-07-26 10:29:09 -0400
b97684536f explicit todo that this is only short term erikkaum 2024-07-26 15:50:35 +0200
4b49c50f4c

Support tied embeddings in 0.5B and 1.5B Qwen2 models (#2313) Daniël de Kok 2024-07-26 14:57:24 +0200
6609feec64 revert wrong update erikkaum 2024-07-26 11:39:22 +0200
12381b0b0e delete the last no repeat processor from warpers feature/no_repeat_ngram_size erikkaum 2024-07-25 17:31:04 +0200
135be1f5c7 update docs again erikkaum 2024-07-26 11:09:11 +0200
fcb2dfb683 fixes and update docs erikkaum 2024-07-26 11:00:27 +0200
21efb19c13 changes from original branch erikkaum 2024-07-26 10:59:09 +0200
169c8c2cf5 token.to_str() returns result add_api_key erikkaum 2024-07-26 10:52:55 +0200
b890c8c47d Update vLLM dependency to 0.5.3.post1 Daniël de Kok 2024-07-26 08:51:32 +0000
cd01adcdee token.to_str() returns result erikkaum 2024-07-26 10:27:43 +0200
cd2508c19f Support tied embeddings in 0.5B and 1.5B Qwen2 models Daniël de Kok 2024-07-26 08:18:30 +0000
28ae96b28b fix: reject grammars without properties drbh 2024-07-25 17:12:38 +0000
80d1868ecf Fix registry name (#2307) Adrien 2024-07-25 16:06:00 +0200
22d2341fb8 Fixing idefics on g6 tests. (#2306) Nicolas Patry 2024-07-25 14:44:21 +0200
70465b23e2 Some small fixes for the Torch 2.4.0 update (#2304) Daniël de Kok 2024-07-25 13:34:44 +0200
3fe117f492 Using g6 instead of g5. (#2281) Nicolas Patry 2024-07-25 11:21:17 +0200
ec4054487e fix: refactor adapter weight loading and mapping (#2193) drbh 2024-07-24 15:32:14 -0400
8ed01b16dc Split up layers.marlin into several files (#2292) Daniël de Kok 2024-07-24 16:33:26 +0200
7c7a0e9897 fix of use of unquantized weights in cohere GQA loading, also enable … (#2291) Wang, Yi 2024-07-24 16:44:02 +0800
9e7c515489 fix crash in multi-modal (#2245) Wang, Yi 2024-07-24 16:39:08 +0800
57ecf0b78a hotfix: update nccl OlivierDehaene 2024-07-23 23:31:28 +0200
bc076dceb6 chore: update to torch 2.4 (#2259) OlivierDehaene 2024-07-23 20:39:43 +0000
0eca032d04 hotfix: pin numpy (#2289) Daniël de Kok 2024-07-23 17:53:19 +0200
cb3b8fddba Add support for Llama 3 rotary embeddings (#2286) Daniël de Kok 2024-07-23 17:18:54 +0200
6dd74a3321 Preparing for release. (#2285) Nicolas Patry 2024-07-23 16:20:17 +0200
7c874e5c4f [WIP] Add support for Mistral-Nemo by supporting head_dim through config (#2254) shaltielshmid 2024-07-23 16:00:07 +0300
9ae43a1bee Add support for repacking AWQ weights for GPTQ-Marlin (#2278) Daniël de Kok 2024-07-23 13:08:20 +0200
6b6dce2a4e fix(l4): fix fp8 logic on l4 (#2277) OlivierDehaene 2024-07-23 09:24:29 +0000
d3ebcdc424 Fixing mistral nemo. (#2276) Nicolas Patry 2024-07-23 11:16:03 +0200
4bf3e5971b use proper name for ci (#2274) Adrien 2024-07-22 21:50:53 +0200
800d8de688 Softcapping for gemma2. (#2273) Nicolas Patry 2024-07-22 18:27:10 +0200
9c0c652b4c fix(server): fix fp8 weight loading (#2268) OlivierDehaene 2024-07-22 15:51:32 +0000
bca1d8669a fix(ci): test new instances (#2272) Adrien 2024-07-22 14:41:30 +0200
f3b0c2f3d9 legacy warning on text_generation client (#2271) Erik Kaunismäki 2024-07-22 12:00:17 +0200
2ca2fd6f56 Hotfix: fix of use of unquantized weights in Mixtral GQA loading (#2269) icyboy™ 2024-07-22 17:31:00 +0800
3a5f11ebb4 fix(server): fix deepseekv2 loading (#2266) OlivierDehaene 2024-07-21 16:48:04 +0000