text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

dfb801ff0f fix FlashDecoding change's regression in intel platform install triton because GPTQParams needs it. Wang, Yi A 2024-07-01 21:37:40 -0700
4327210e6b

[Major Change][Undecided yet] Move to FlashDecoding instead of PagedAttention kernel. (#1940) Nicolas Patry 2024-07-01 23:28:00 +0200
4f55f15840

Fixing baichuan override. (#2158) Nicolas Patry 2024-07-01 23:25:54 +0200
83f61d6d7d Fixing baichuan override. Nicolas Patry 2024-07-01 21:05:19 +0000
88e2a6a23a fix: avoid loading mistral adapters in mixtral fix-mixtral-adapter-loading drbh 2024-07-01 19:49:05 +0000
d9c7f69888 Add support for manually triggering a release build Daniël de Kok 2024-07-01 14:24:48 +0200
d8c459ecc0 fix: use the base layers weight in mistral rocm drbh 2024-07-01 16:42:45 +0000
1c7c21d596 No need to recreate anything actually. Nicolas Patry 2024-07-01 16:37:36 +0000
ef8bce0b41 Fixup mistral clamping (had issues with cuda graphs). Nicolas Patry 2024-07-01 16:31:22 +0000
b686f66727 Fixing Mi{s,x}tral (non functional in Flash Decoding mode though). Nicolas Patry 2024-07-01 16:16:21 +0000
6dc98abe46 Remove unused parameters annd force tokenizer name to be set Morgan Funtowicz 2024-07-01 16:11:59 +0200
9895e8db99 Add more representative Llama GPTQ test Daniël de Kok 2024-07-01 14:08:44 +0200
47ac5c654d Working FFI call for TGI and TRTLLM backend Morgan Funtowicz 2024-07-01 15:53:23 +0200
1bd52157d8 Update mistral past. Nicolas Patry 2024-07-01 13:19:26 +0000
8fa8cda660 Changing return everywhere. Nicolas Patry 2024-07-01 12:08:59 +0000
a26e57f9f3 Fixing non flash tests/imports. Nicolas Patry 2024-07-01 11:54:34 +0000
4b1364da92 Factoring cu_seqlen_qk for better abstracting over every model. Nicolas Patry 2024-07-01 10:55:00 +0000
65980ed75a These do not belong. Nicolas Patry 2024-06-25 15:06:52 +0000
5f38d79719 "ipex" -> "cpu" Nicolas Patry 2024-06-25 14:24:28 +0000
212a59544b Update? Nicolas Patry 2024-06-25 13:10:20 +0000
fcbc6876c0 No need for cache_manager anymore. Nicolas Patry 2024-06-25 12:24:45 +0000
4f1b1a277c Rebased. Nicolas Patry 2024-06-25 12:20:50 +0000
988aa34f3d Fix non decoding paths. Nicolas Patry 2024-05-31 22:56:31 +0000
b98b94d695 Fix Cohere. Nicolas Patry 2024-05-31 22:54:43 +0000
66081e6ae7 Making it work on non flash decoding. Nicolas Patry 2024-05-31 21:41:19 +0000
4293a12863 Using flash decoding Nicolas Patry 2024-05-17 08:43:33 +0000
d0225b1015

GH router. (#2153) Nicolas Patry 2024-07-01 15:42:26 +0200
466d4cef48 GH router. Nicolas Patry 2024-07-01 13:28:01 +0000
17cebc4506

Fixing test. (#2152) Nicolas Patry 2024-07-01 15:24:17 +0200
b85bb02b86 Fixing test. Nicolas Patry 2024-07-01 13:23:17 +0000
9eefb2f672

fix: prefer serde structs over custom functions (#2127) drbh 2024-07-01 09:08:05 -0400
5da4cfab1c

refine get xpu free memory/enable Qwen2/gemma2/gemma/phi in intel platform (#2132) Wang, Yi 2024-07-01 20:32:54 +0800
e0bfe4e7f0 fix Felix Marty 2024-07-01 12:31:56 +0000
afe9d74337 Fixing the post processor. Nicolas Patry 2024-07-01 12:29:20 +0000
750ef7bc23 Merge branch 'ci_amd3' of github.com:huggingface/text-generation-inference into ci_amd3 Felix Marty 2024-07-01 12:20:40 +0000
00cc73b7b7 fix post merge Felix Marty 2024-07-01 12:20:29 +0000
9d0ca503a8

fix AttributeError: 'MixtralLayer' object has no attribute 'mlp' (#2123) icyboy™ 2024-07-01 20:17:22 +0800
59849777de Merge branch 'main' into ci_amd3 fxmarty 2024-07-01 14:14:46 +0200
9fd395fae4 fix tests Felix Marty 2024-07-01 12:12:26 +0000
153c8ae60f

Merge branch 'main' into prefer-chat-object-enum Nicolas Patry 2024-07-01 14:10:45 +0200
2ce8019480

Use GPTQ-Marlin for supported GPTQ configurations (#2111) Daniël de Kok 2024-07-01 12:59:12 +0200
0d97a93c1e

feat: download lora adapter weights from launcher (#2140) drbh 2024-07-01 06:58:49 -0400
25f57e2e98

fix: use weights from base_layer (#2141) drbh 2024-07-01 06:58:40 -0400
b4552f9de9

Fixing clippy. (#2149) Nicolas Patry 2024-07-01 12:02:19 +0200
50da0ce75f

Fixing clippy. Nicolas Patry 2024-07-01 12:01:22 +0200
6ea570ddfe

fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_… (#2148) Wang, Yi 2024-07-01 17:27:53 +0800
18d978ba0f

Apply suggestions from code review Nicolas Patry 2024-07-01 11:27:42 +0200
81d0def84a fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_indices] Wang, Yi A 2024-07-01 00:38:20 -0700
dc402dc9ac Initial setup for CXX binding to TRTLLM Morgan Funtowicz 2024-06-30 23:37:20 +0200
45da4460a3 change name to info routes Kevin Duffy 2024-06-28 18:46:07 +0100
b3e21ed42e Add API_Key for Auth and conditionally add authorisation for non info/health endpoints. Kevin Duffy 2024-06-28 18:41:21 +0100
05d1011b4f fix xpu build Felix Marty 2024-06-28 16:08:27 +0000
a00db1b474 fix: adjust unwrap syntax in template drbh 2024-06-28 15:22:15 +0000
68583d3240 working memory leak fix in tunableop Felix Marty 2024-06-28 15:15:12 +0000
8885688630 fix: update create_post_processor logic for token type drbh 2024-06-28 15:07:50 +0000
c4feb9854c fix: use weights from base_layer drbh 2024-06-28 14:49:41 +0000
c326ffdac0 fix: adjust HubTokenizerConfig after rebase drbh 2024-06-27 11:33:29 -0400
d759a7f492 feat: leverage serde for conditional deser drbh 2024-06-27 15:11:14 +0000
4ba5e74efc fix: adjust typo drbh 2024-06-27 13:11:19 +0000
ae14f8931e fix: enum CompletionType not ObjectType drbh 2024-06-27 13:07:05 +0000
39c6d10b5a fix: adjust typo drbh 2024-06-26 22:56:36 +0000
f98f498473 fix: prefer enum for chat object drbh 2024-06-26 22:54:00 +0000
1c0b916e63 feat: download lora adapter weights from launcher drbh 2024-06-28 14:26:03 +0000
9815feb2e3 Revert "Update devcontainer to use correct update content command path" backends/trtllm Morgan Funtowicz 2024-06-28 15:26:45 +0200
b67073df41 Update devcontainer to use correct update content command path Morgan Funtowicz 2024-06-28 15:22:54 +0200
8e25428713 Update devcontainer to remove clang and base image on PyTorch Morgan Funtowicz 2024-06-28 15:16:10 +0200
3d50ff71b7 bump torch to more recent version Felix Marty 2024-06-28 13:10:43 +0000
f3e729a6d6 Add devcontainer to ease backend development Morgan Funtowicz 2024-06-28 14:39:19 +0200
87db820627 fix rm Felix Marty 2024-06-28 09:49:20 +0000
fb98ab273f

Fixing the CI to also run in release when it's a tag ? (#2138) Nicolas Patry 2024-06-28 09:31:09 +0200
488ddee64d

Fixing the CI to also run in release when it's a tag ? Nicolas Patry 2024-06-28 08:53:14 +0200
192d49af0b

2.1.0 names for release. v2.1.0 git_v2.1.0 Nicolas Patry 2024-06-28 08:20:59 +0200
36077d8ff9 enable gemma/gemma2/phi in intel platform Wang, Yi A 2024-06-27 19:33:17 -0700
af16320e66 Merge branch 'main' into mem_refine Wang, Yi A 2024-06-27 19:12:42 -0700
74b0231b19

fix: refactor post_processor logic and add test (#2137) drbh 2024-06-27 17:16:19 -0400
a921854d92 fix: adjust when post_processor is overridden and improve create_post_processor drbh 2024-06-27 20:47:06 +0000
74535ce80f fix: remove dev comment drbh 2024-06-27 18:42:41 +0000
f85cd58e2c fix: refactor post_processor logic and add test drbh 2024-06-27 18:34:26 +0000
eaa6890b3c remove hidden Felix Marty 2024-06-27 15:24:14 +0000
0a5485d8a0 avoid permissions issues Felix Marty 2024-06-27 14:51:11 +0000
3ea8259af1

Fixing gemma2. (#2135) Nicolas Patry 2024-06-27 16:04:20 +0200
0e4ab6d31c

Fixing malformed rust tokenizers (#2134) Nicolas Patry 2024-06-27 16:04:03 +0200
aeeb291ffa Fix for deepseek too. Nicolas Patry 2024-06-27 13:56:20 +0000
dd2d91b043

Idefics2: sync added image tokens with transformers (#2080) Daniël de Kok 2024-06-27 15:54:35 +0200
bbc949ff74 trigger ci Felix Marty 2024-06-27 13:47:21 +0000
80b448c2bb Idefics2: sync added image tokens with transformers Daniël de Kok 2024-06-20 09:21:58 +0200
ded50f900d Adding new model. Nicolas Patry 2024-06-27 13:39:20 +0000
02ac45131f some cleaning automodel-supports-flash-paged-attention Felix Marty 2024-06-27 13:33:35 +0000
3760102077 add missing files Felix Marty 2024-06-27 13:30:40 +0000
aa87939774 Fixing malformed rust tokenizers Nicolas Patry 2024-06-27 13:30:32 +0000
770975fa81 refactor Felix Marty 2024-06-27 13:24:58 +0000
6982f9bcb1 enable qwen2 in xpu Wang, Yi A 2024-06-27 06:01:07 -0700
cb37c551ab working flash + paged through transformers Felix Marty 2024-06-27 12:39:36 +0000
886bfab23d refine get xpu free memory Wang, Yi A 2024-06-27 05:18:57 -0700
91423771be Missing dependency Morgan Funtowicz 2024-06-27 12:43:40 +0200
4335a39f92 First definition of binding trtllm to rust Morgan Funtowicz 2024-06-27 12:41:49 +0200
c6537df493 enable build cmake binding Morgan Funtowicz 2024-06-27 12:41:36 +0200
b53b21c63a

Bumping to 2.1 (#2131) Nicolas Patry 2024-06-27 12:34:43 +0200
d8185ad942

Bumping to 2.1 Nicolas Patry 2024-06-27 11:56:21 +0200
2e763d12ad Use GPTQ-Marlin for supported GPTQ configurations Daniël de Kok 2024-06-24 15:11:49 +0200