text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-06-19 15:52:08 +00:00

Commit Graph

Select branches

Hide Pull Requests

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi/add-ci

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

improve-docs

improve-dynamic-message-content

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3258

#3260

#3261

#3262

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3267

#3268

#3269

#327

#3270

#3271

#3273

#328

#329

#33

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

3b1b049b32

Enable KQV offload by default Adrien Gallouët 2025-02-06 18:33:30 +0000
a68aefa86d

Intel extension fix. Nicolas Patry 2025-02-06 19:26:57 +0100
072082774e

Attempt to fix intel CPU. Nicolas Patry 2025-02-06 16:53:40 +0100
dc2e3e5ded

Forgot the integration snapshot. Nicolas Patry 2025-02-06 16:47:29 +0100
856709d5c3

[Backend] Bump TRTLLM to v.0.17.0 (#2991) Funtowicz Morgan 2025-02-06 16:45:03 +0100
809e288b5a

Fix fmt Adrien Gallouët 2025-02-06 14:58:44 +0000
5367d94f34

Fix requirements.txt Adrien Gallouët 2025-02-06 14:45:55 +0000
595f2b6fce

Town instead village. Nicolas Patry 2025-02-06 15:28:53 +0100
df723e646b

Bump llama.cpp & cuda Adrien Gallouët 2025-02-06 13:24:36 +0000
7bff88bba9

Do not use HOSTNAME env Adrien Gallouët 2025-02-06 13:17:17 +0000
a91127c24b

Updating mllama after strftime. Nicolas Patry 2025-02-05 12:11:18 +0100
36223f834e

Triton fix (#2995) Wang, Yi 2025-02-06 19:28:41 +0800
0ef8c8a97a

Using the "lockfile". (#2992) Nicolas Patry 2025-02-06 12:28:24 +0100
8bc10d37ee

Update docs Adrien Gallouët 2025-02-06 10:31:05 +0000
2b0d99c1cf

Thanks cargo fmt Adrien Gallouët 2025-02-06 10:08:18 +0000
fb81c0d1c4

Thanks clippy Adrien Gallouët 2025-02-06 10:53:57 +0100
e4d5fa7eaf

Update docs Adrien Gallouët 2025-02-06 09:46:24 +0000
408663e61a fix triton to 3.1.0 to fix ipex import issue triton_fix Wang, Yi A 2025-02-06 00:51:17 -0800
8a3c9fb79a

Applying to other builds. Nicolas Patry 2025-02-06 09:46:25 +0100
393a7efc9e backend(trtllm): link against CUDA 12.8 Morgan Funtowicz 2025-02-06 09:38:06 +0100
a3b05fc943

Mode max. Nicolas Patry 2025-02-06 01:49:24 +0100
2f7b023f39

Don't break all other builds. Nicolas Patry 2025-02-06 01:13:31 +0100
c8b0eddf79

Attempt #42 Nicolas Patry 2025-02-06 00:57:22 +0100
3514d2dc8c

.. Nicolas Patry 2025-02-05 23:14:51 +0100
0d382c4508

. Nicolas Patry 2025-02-05 23:11:01 +0100
d5fc0577b8 backend(trtllm): make sure we escalade all warnings as errors on the backend impl in debug mode Morgan Funtowicz 2025-02-05 23:01:09 +0100
117d27849c backend(trtllm): use return value optimization flag as as error if available Morgan Funtowicz 2025-02-05 23:00:42 +0100
1641c22af8

Add doc Adrien Gallouët 2025-02-05 21:14:30 +0000
7f00c37905 backend(trtllm): build against gcc-14 with cuda12.8 Morgan Funtowicz 2025-02-05 22:05:46 +0100
e3326e6b0b

We need the launcher still. Nicolas Patry 2025-02-05 20:58:10 +0100
b3e40c4b66

Improve default settings Adrien Gallouët 2025-02-05 16:38:52 +0000
9258fa6a24

How in the world... Nicolas Patry 2025-02-05 17:38:41 +0100
f22e2fb550

Cleanup Adrien Gallouët 2025-02-05 16:12:34 +0000
19ea893956

The good old monkey. Nicolas Patry 2025-02-05 16:53:36 +0100
0f62401b8e

Initialize penalty_last_n with llamacpp default value Adrien Gallouët 2025-02-05 15:44:46 +0000
695b1292e9

Ensure all samplers are freed on error Adrien Gallouët 2025-02-05 15:42:59 +0000
07c0080970 fix: add transformer overlay for processor support drbh 2025-02-05 15:42:22 +0000
830c25dd5a

Bad cache hits. Nicolas Patry 2025-02-05 16:21:57 +0100
d299b52cb5 backend(trtllm): link against decoder_attention_{0|1} Morgan Funtowicz 2025-02-05 16:15:31 +0100
5caf5401ff

.. Nicolas Patry 2025-02-05 15:38:53 +0100
11c9acab42 backend(trtllm): use correct library reference decoder_attention_src Morgan Funtowicz 2025-02-05 15:33:36 +0100
027931d262

Another attempt. Nicolas Patry 2025-02-05 15:31:50 +0100
5b777877b1

Make max_batch_total_tokens optional Adrien Gallouët 2025-02-05 11:40:20 +0000
09a745f1b8

Remove n_ctx Adrien Gallouët 2025-02-05 11:31:58 +0000
76c458a8a2

Lock on python 3.11 Nicolas Patry 2025-02-05 12:28:19 +0100
051ff2d5ce

Rename bindings Adrien Gallouët 2025-02-05 11:13:17 +0000
c52f08351f

Set TGI_LLAMA_PKG_CUDA from CUDA_VERSION Adrien Gallouët 2025-02-05 10:57:50 +0000
a1c78adc19

Revert dummy modifications. Nicolas Patry 2025-02-05 11:55:17 +0100
951eb62b56

Using the "lockfile". Nicolas Patry 2025-02-05 11:48:40 +0100
dbee804129

Simplify batching logic Adrien Gallouët 2025-02-05 10:12:39 +0000
d3a772a8dd

Update args Adrien Gallouët 2025-02-05 10:10:38 +0000
9f6f1e905d backend(trtllm): use arg instead of env Morgan Funtowicz 2025-02-05 10:30:02 +0100
4c44de4ee7 backend(trtllm): forget to bump dockerfile Morgan Funtowicz 2025-02-05 10:27:47 +0100
6168ffc23f backend(trtllm): bump TRTLLM to v.0.17.0 Morgan Funtowicz 2025-02-05 10:14:20 +0100
c837843264

Merge 4e1c68e6f8 into c1cf36c0dc Funtowicz Morgan 2025-02-05 09:24:02 +0100
76d526d931 feat: check before rope type adjustment and small refactors drbh 2025-02-05 02:27:29 +0000
1f585775b8 fix: bump support models doc drbh 2025-01-31 12:47:04 -0500
10aa62f87f feat: support qwen2.5 vl model drbh 2025-01-31 12:36:03 -0500
e007529590

Update Cargo.lock Adrien Gallouët 2025-02-04 17:54:53 +0000
906c265aef

Cleanup Dockerfile Adrien Gallouët 2025-02-04 17:53:47 +0000
c1cf36c0dc

Improve qwen vl impl (#2943) drbh 2025-02-04 12:44:18 -0500
dd2bd5fdb3

impureWithCuda: fix gcc version (#2990) Daniël de Kok 2025-02-04 17:01:59 +0100
df2a4fbb8a

Update Dockerfile_llamacpp Adrien Gallouët 2025-02-04 12:34:02 +0000
d883109df6

Disable graceful shutdown in debug mode Adrien Gallouët 2025-02-03 20:58:33 +0000
207041a977

Bump llamacpp to b4623 Adrien Gallouët 2025-02-03 13:38:42 +0000
38b33e9698

Add --type-v & --type-k Adrien Gallouët 2025-02-03 12:39:28 +0000
bfb8e03e9f

Add specific args for batch Adrien Gallouët 2025-02-03 11:03:47 +0000
e6a8d33902

backend(llama): add CUDA architectures build argument for Dockerfile Morgan Funtowicz 2025-02-03 11:36:44 +0100
ea28332bb3

Cleanup Adrien Gallouët 2025-02-01 20:40:59 +0000
104a968d01

Remove warmup Adrien Gallouët 2025-02-01 20:27:31 +0000
8ed362d03a

Clear request cache after completion Adrien Gallouët 2025-02-01 20:20:43 +0000
c8505fb300

Auto-detect n_threads when not provided Adrien Gallouët 2025-02-01 18:33:26 +0000
27534d8ee4

Fix seq iterations Adrien Gallouët 2025-02-01 17:55:00 +0000
96434a1e7e

Fix batching Adrien Gallouët 2025-02-01 16:09:51 +0000
2a51e415ff

Output real logprobs Adrien Gallouët 2025-02-01 11:37:14 +0000
161280f313

Only export the latest logits Adrien Gallouët 2025-02-01 10:51:44 +0000
960c12bd6e

backend(llama): add CUDA Dockerfile_llamacpp for now Morgan Funtowicz 2025-01-31 22:13:59 +0100
f38c34aeb7

Fix batch_pos Adrien Gallouët 2025-01-31 18:20:45 +0000
e88a527fcf

Add --offload-kqv Adrien Gallouët 2025-01-31 16:23:22 +0000
ae5bb789c2

Enable flash attention by default Adrien Gallouët 2025-01-31 16:07:10 +0000
3f199134f0

Fix args Adrien Gallouët 2025-01-31 15:51:28 +0000
7a3ed4171e

Add --numa Adrien Gallouët 2025-01-31 15:09:29 +0000
390f0ec061

Cleanup Adrien Gallouët 2025-01-31 15:00:23 +0000
d6ded897a8

Add a stupid batch mechanism Adrien Gallouët 2025-01-31 12:44:09 +0000
e07835c5b5

Add --defrag-threshold Adrien Gallouët 2025-01-31 10:38:34 +0000
f388747985

Add GPU args Adrien Gallouët 2025-01-31 09:50:57 +0000
8d2dfdf668

Handle ctx args & fix sampling Adrien Gallouët 2025-01-30 22:41:26 +0000
a7b4b04cb5

Add some input validation checks Adrien Gallouët 2025-01-30 20:21:37 +0000
e7facf692f

Handle max_batch_size Adrien Gallouët 2025-01-30 19:50:09 +0000
3eb4823f3e

Use max_batch_total_tokens Adrien Gallouët 2025-01-30 15:12:55 +0000
bd0cc9905c

Get rid of llama_batch_get_one() Adrien Gallouët 2025-01-30 13:41:35 +0000
95e221eece

Add llamacpp backend Adrien Gallouët 2025-01-24 09:05:37 +0000
b3436da43d trufflehog: do not fail on unverified results Daniël de Kok 2025-02-04 12:23:54 +0000
06abe3f50e impureWithCuda: fix gcc version Daniël de Kok 2025-02-04 10:24:33 +0000
cfc6cbc4d6

fix: Functioncall is actually a bit different than the deprecated function definition type Nicolas Casademont 2025-02-04 11:09:55 +0100
45d3a3a253

fix: Allow back arguments in function definition and the corresponding test Nicolas Casademont 2025-02-04 11:07:42 +0100
58f5f2ee27 fix: adjust signatures with types drbh 2025-02-04 00:30:47 +0000
6cb0cb68b4 fix: improve and simplify get_cos_sin, refactors and cleanup get_position_ids drbh 2025-02-04 00:25:59 +0000
88fd56f549

Add strftime_now callable function for minijinja chat templates (#2983) Alvaro Bartolome 2025-02-03 15:30:48 +0100
8ae92e5d70

Merge branch 'huggingface:main' into fix/dockerfile-triton Yaser Jaradeh 2025-02-03 11:48:01 +0100