text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-12 04:44:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

7e810e7628 fix: update client exports and adjust after rebase drbh 2024-07-19 17:18:33 +0000
80ab61c013 feat: add ruff and resolve issue drbh 2024-07-19 16:02:20 +0000
5d85a958c9

fix: refactor adapter weight loading and mapping (#2193) drbh 2024-07-24 15:32:14 -0400
631ca319f3

Update the idefics2 snapshot. Nicolas Patry 2024-07-24 19:11:08 +0200
93d2b9fe9c

Split up layers.marlin into several files (#2292) Daniël de Kok 2024-07-24 16:33:26 +0200
9f9997b5d4

convert strings to lowercase for case insensitive comparison KevinDuffy94 2024-07-24 10:32:48 -0400
19e63ffccc

Fix comment KevinDuffy94 2024-07-24 10:27:27 -0400
75b55efcc7 server quantize: store quantizer config in standard format Daniël de Kok 2024-07-24 13:27:20 +0000
82c7f951f2 fix: comment typo drbh 2024-07-24 13:26:44 +0000
1f3b2aeee4 fix: improve get_model_with_lora_adapters naming drbh 2024-07-24 13:25:24 +0000
5f2e1f0d7e fix: fix missing model id i rocm warmup Islam Almersawi 2024-07-24 17:22:44 +0400
fbb683fce7 fix aliases matvey-kolbasov-hs 2024-07-24 16:05:38 +0300
eabcb2967a logging matvey-kolbasov-hs 2024-07-24 15:37:16 +0300
48315e2608 clean up a bit Morgan Funtowicz 2024-07-24 09:52:38 +0000
9c60c9ca43 add missing dependant libraries for linking Morgan Funtowicz 2024-07-24 09:29:24 +0000
f73f57ca21 tied embeddings for qwe2 matvey-kolbasov-hs 2024-07-24 11:59:56 +0300
8642250602

fix of use of unquantized weights in cohere GQA loading, also enable … (#2291) Wang, Yi 2024-07-24 16:44:02 +0800
272e6f987f Split up layers.marlin into several files Daniël de Kok 2024-07-24 08:22:54 +0000
5ad39dd3c3

fix crash in multi-modal (#2245) Wang, Yi 2024-07-24 16:39:08 +0800
09bcca6a97 update build.rs to link to cuda 12.5 Morgan Funtowicz 2024-07-24 07:50:26 +0000
0c651ac7be fix of use of unquantized weights in cohere GQA loading, also enable the model in intel platform Wang, Yi A 2024-07-23 23:07:21 -0700
02b0eaaba0 MODEL_ID propagation fix root 2024-07-24 03:35:53 +0000
e4fc0ebcbe update TensorRT install script to latest Morgan Funtowicz 2024-07-23 22:23:30 +0000
03935f6705 update TensorRT-LLM to latest version Morgan Funtowicz 2024-07-23 22:13:02 +0000
ef1876346c refactor the compute capabilities detection along with num gpus Morgan Funtowicz 2024-07-23 22:12:42 +0000
a895029424

hotfix: update nccl OlivierDehaene 2024-07-23 23:31:28 +0200
344427b6ab

feat(router): drop permit after batching feat/max_queue_size OlivierDehaene 2024-07-23 14:45:30 +0200
e7e3aa6cac

chore: update to torch 2.4 (#2259) OlivierDehaene 2024-07-23 20:39:43 +0000
9491c155bb

fix OlivierDehaene 2024-07-23 21:40:42 +0200
0c7910f7bc

remove un-necessary patch OlivierDehaene 2024-07-23 19:40:10 +0200
0e527ae106

chore: update to torch 2.4 OlivierDehaene 2024-07-19 15:15:18 +0200
d3fc28ebe7 no-repeat-ngram is processor not warper Nathan Brake 2024-07-23 12:44:50 -0400
db7e043ded New version. v2.2.0 git_v2.2.0 Nicolas Patry 2024-07-23 17:25:18 +0200
bc9593a5b1

hotfix: pin numpy (#2289) Daniël de Kok 2024-07-23 17:53:19 +0200
c370636cba hotfix: pin numpy Daniël de Kok 2024-07-23 15:52:28 +0000
56a695cdb9 New version. Nicolas Patry 2024-07-23 17:25:18 +0200
4ab4173767

Add support for Llama 3 rotary embeddings (#2286) Daniël de Kok 2024-07-23 17:18:54 +0200
e665bea857 Update transformers to 4.43 Daniël de Kok 2024-07-23 15:15:01 +0000
2c3b078911 Add support for Llama 3 rotary embeddings Daniël de Kok 2024-07-23 14:34:56 +0000
5d121a9705

Preparing for release. (#2285) Nicolas Patry 2024-07-23 16:20:17 +0200
fa470bc851

Fixing token within the docker image for the launcher. Nicolas Patry 2024-07-23 14:03:01 +0000
dc05d7ba23

Updating docs. Nicolas Patry 2024-07-23 13:42:32 +0000
4d980942de

Preparing for release. Nicolas Patry 2024-07-23 13:26:30 +0000
0c95f7a942 Debug softcap flash decoding activation debug/gemma2 Daniël de Kok 2024-07-23 13:12:19 +0000
3961e32390

[WIP] Add support for Mistral-Nemo by supporting head_dim through config (#2254) shaltielshmid 2024-07-23 16:00:07 +0300
32cc60f329

Shorter diff. Nicolas Patry 2024-07-23 12:59:35 +0000
ab62312d8c

Using head_dim as a fallback is necessary since it's a non standard key in mistralConfig (as defined in transformers). Nicolas Patry 2024-07-23 12:56:37 +0000
9935720c87

Add support for repacking AWQ weights for GPTQ-Marlin (#2278) Daniël de Kok 2024-07-23 13:08:20 +0200
f0a5cb6c4e Merge branch 'main' into add-mistral-nemo Shaltiel Shmidman 2024-07-23 12:44:56 +0300
712729bc78 Enable Marlin for supported AWQ configurations by default Daniël de Kok 2024-07-23 09:31:36 +0000
dee649c60c Chore: Fix naming issues regarding head_size, there can only be one. fix_mistral2 Nicolas Patry 2024-07-23 11:26:53 +0200
5fca30ee15

fix(l4): fix fp8 logic on l4 (#2277) OlivierDehaene 2024-07-23 09:24:29 +0000
abc32537ea

Fixing mistral nemo. (#2276) Nicolas Patry 2024-07-23 11:16:03 +0200
aa2cf4e8ee Using g6 instead of g5. Nicolas Patry 2024-07-23 11:07:35 +0200
33cb2cefed quick fix erikkaum 2024-07-23 11:04:04 +0200
025f80dfd4

use marlin even on 89 OlivierDehaene 2024-07-23 10:35:32 +0200
3c39ab5ac8 fix typo Morgan Funtowicz 2024-07-23 08:11:36 +0000
4c657ca158 make docker linter happy with same capitalization rule Morgan Funtowicz 2024-07-23 07:42:31 +0000
d9decb4c2c move to TensorRT-LLM v0.11.0 Morgan Funtowicz 2024-07-23 07:35:00 +0000
ff151b738b refactored docker image Morgan Funtowicz 2024-07-23 07:34:40 +0000
3db1be412c commenting out Python part for TensorRT installation Morgan Funtowicz 2024-07-23 07:27:34 +0000
10448ea8c9 added tgi to name of metric Edwinhr716 2024-07-22 20:38:07 +0000
4700465192

use proper name for ci (#2274) Adrien 2024-07-22 21:50:53 +0200
805e584b92 update tgi entrypoint Morgan Funtowicz 2024-07-22 19:13:01 +0000
85baa5da89 adding max_token_capacity_metric Edwinhr716 2024-07-22 18:21:34 +0000
32794b1caa Add support for repacking AWQ weights for GPTQ-Marlin Daniël de Kok 2024-07-22 17:39:38 +0000
473f968a01

also quant weights with single scale OlivierDehaene 2024-07-22 18:49:10 +0200
3d0c7b85fe

fix(l4): fix fp8 logic on l4 OlivierDehaene 2024-07-22 18:45:26 +0200
4d3936ea32

Fixing mistral nemo. Nicolas Patry 2024-07-22 16:36:19 +0000
6aeb669072

Softcapping for gemma2. (#2273) Nicolas Patry 2024-07-22 18:27:10 +0200
5266f15ae1

0.0 is the null value in the C++ API. Nicolas Patry 2024-07-22 15:59:09 +0000
4844ff790a

fix(server): fix fp8 weight loading (#2268) OlivierDehaene 2024-07-22 15:51:32 +0000
d0a34a95f2 adding missing ld_library_path for cuda stubs in Dockerfile Morgan Funtowicz 2024-07-22 15:16:39 +0000
3fd2bb70c3 fix missing / before tgi lib path Morgan Funtowicz 2024-07-22 14:57:03 +0000
a32ef3b875 correctly setup linking search path for runtime layer Morgan Funtowicz 2024-07-22 14:42:43 +0000
c813d64a90

missing group Adrien 2024-07-22 16:34:09 +0200
d2009e2262

use proper name for ci Adrien 2024-07-22 16:31:17 +0200
fd06ca6e7e add missing pkgconfig folder for MPI in Dockerfile Morgan Funtowicz 2024-07-22 14:19:51 +0000
40330c73f0 align all the linker search dependency Morgan Funtowicz 2024-07-22 14:14:57 +0000
6d8e3659a9

revert default dtype OlivierDehaene 2024-07-22 16:13:53 +0200
c4b78bd214

No access to transformers config, only config_dict here. Nicolas Patry 2024-07-22 13:54:17 +0000
5829b7821e

Less clutter. Nicolas Patry 2024-07-22 13:49:24 +0000
59022c22b4 fix: impove adapter merge comments and remove unused conditional drbh 2024-07-18 18:20:51 +0000
d27131bfa8 fix: improve logging and rebase syntax issue drbh 2024-07-15 20:40:39 +0000
5ec88a1b51 feat: improve weight loading and add tests drbh 2024-07-15 14:32:06 +0000
8c3530f705 fix: adjust launcher for local lora adapters drbh 2024-07-09 02:47:47 +0000
4b569341e6 feat: enable lora load from directory drbh 2024-07-09 02:38:50 +0000
70dc958fb8 fix: refactor adapter weight loading and mapping drbh 2024-07-05 15:00:36 +0000
620416f13f

Softcapping for gemma2. Nicolas Patry 2024-07-22 13:06:03 +0000
0d68619efa

update snap OlivierDehaene 2024-07-22 15:03:42 +0200
74f1f6a702

fixed scales loading OlivierDehaene 2024-07-22 13:56:12 +0200
119918cc0a

fix(server): fix fp8 weight loading OlivierDehaene 2024-07-21 20:56:54 +0200
6aebf44f47

fix(ci): test new instances (#2272) Adrien 2024-07-22 14:41:30 +0200
1c8b78ae99

improve build ci Adrien 2024-07-22 14:34:45 +0200
56e7f5d779

test new instances Adrien 2024-07-22 14:09:19 +0200
6a9e925ec1 fix bad copy/past missing nvinfer linkage direction Morgan Funtowicz 2024-07-22 11:43:10 +0000
3597beefe2 leverage pkg-config to probe libraries paths and reuse new install structure from cmake Morgan Funtowicz 2024-07-22 11:39:11 +0000
2aac2ff2cd do the same name definition stuff for tensorrt_llm_executor_static Morgan Funtowicz 2024-07-22 11:32:54 +0000
da079df4cd simplify prebuilt trtllm libraries name definition Morgan Funtowicz 2024-07-22 11:32:31 +0000
07441f5a7a

legacy warning on text_generation client (#2271) Erik Kaunismäki 2024-07-22 12:00:17 +0200