text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 20:34:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

17a4ee92b0 fix: avoid cargo lock tweak drbh 2024-06-14 09:25:41 -0400
210b9cc717 fix: avoid cargo lock changes drbh 2024-06-14 09:20:52 -0400
96b7b40ca3

Update the link for qwen2 (#2068) Tiezhen WANG 2024-06-14 17:59:33 +0800
f5c10d4174 Support different image sizes in prefill in VLMs Daniël de Kok 2024-06-13 15:06:29 +0200
093a27c528

Add support for GPTQ Marlin (#2052) Daniël de Kok 2024-06-14 09:45:42 +0200
af5041a7cf Fix too eager staging Daniël de Kok 2024-06-14 09:40:52 +0200
332e16db26 Fix Qwen2 model URL in model table Daniël de Kok 2024-06-14 09:32:08 +0200
f085355fbe

Update the link for qwen2 Tiezhen WANG 2024-06-14 14:35:23 +0800
7396248379

Update server/text_generation_server/utils/import_utils.py Wang, Yi 2024-06-14 08:48:39 +0800
aa88c4fd3a fix: add lora kernel to dockerfile, support running without kernels and refactors drbh 2024-06-14 00:35:07 +0000
f433f1f770

implement Open Inference Protocol endpoints (#1942) drbh 2024-06-13 12:51:51 -0400
42aa8ee1bb

PR #2049 CI run (#2054) drbh 2024-06-13 11:53:49 -0400
5d2b93ba42 Fixup residual, initial block attention config feature/phi-3-small Daniël de Kok 2024-06-13 10:38:56 +0200
64182534b6 debug debug-gpt2 Felix Marty 2024-06-13 07:48:18 +0000
31b8cc4386 debug Felix Marty 2024-06-13 07:41:46 +0000
8f1de30b0f debug Felix Marty 2024-06-13 07:31:11 +0000
b3e9a13e27 fix idefics2 tests Felix Marty 2024-06-13 07:09:48 +0000
60ee0b5178 Add support for GPTQ Marlin kernels Daniël de Kok 2024-06-12 13:58:36 +0200
98adc45401 fix typo Stefan Daniel Schwarz 2024-06-13 00:46:35 +0200
86b42f5f6d docker-compose Stefan Daniel Schwarz 2024-06-12 23:35:40 +0200
abe521204e fix tests OlivierDehaene 2024-06-12 18:54:25 +0200
05eb4dcb17 allocate 16 by 16 OlivierDehaene 2024-06-12 18:53:14 +0200
90184df79c

fix(layers): fix SuRotaryEmbedding (#2060) OlivierDehaene 2024-06-12 18:24:47 +0200
521de6cacd

fix(server): fix OPT implementation (#2061) OlivierDehaene 2024-06-12 18:22:20 +0200
bbebdffa6a fix(server): fix OPT implementation OlivierDehaene 2024-06-12 18:11:27 +0200
82302262ca remove logs OlivierDehaene 2024-06-12 17:50:53 +0200
9775facbf7 change arange OlivierDehaene 2024-06-12 17:47:46 +0200
9cc16725bf fix(layers): fix SuRotaryEmbedding OlivierDehaene 2024-06-12 17:09:13 +0200
4ed551abba WIP, many bits are still missing... Daniël de Kok 2024-06-12 17:03:55 +0200
c0f201c9d3 Factor out sharding of packed tensors Daniël de Kok 2024-06-12 16:20:51 +0200
3bf8e8e466

Merge pull request #158 from kdamaszk/rebase-tgi-2-0-2 regisss 2024-06-12 15:48:31 +0200
9ac7b7bc52 remove slots from grpc OlivierDehaene 2024-06-12 11:50:31 +0200
ed1d28731b add CPU tgi support Wang, Yi A 2024-06-11 17:56:50 -0700
884ebabfd3 fix: cargo fmt lint for pre commit drbh 2024-06-11 18:46:30 +0000
c2fb459bc1 fix windowing OlivierDehaene 2024-06-11 18:40:38 +0200
37266e2dbb fix rust and python unit-tests OlivierDehaene 2024-06-11 17:11:16 +0200
e6e87a2e26 Use minijinja's pycompat mode for python methods Armin Ronacher 2024-06-11 11:56:05 +0200
376a0b7ada

Support chat response format (#2046) drbh 2024-06-11 10:44:56 -0400
7c7470542d fix tests fxmarty 2024-06-11 13:40:35 +0000
a6e4d63c86

Update LLMM1 bound (#2050) fxmarty 2024-06-11 13:30:29 +0200
7ee9c1af51 update commit fxmarty 2024-06-11 11:26:04 +0000
dadfff621e update fxmarty 2024-06-11 11:25:14 +0000
73c3903214 FlashCausalLM implem OlivierDehaene 2024-06-11 12:38:07 +0200
6983ec9537 small refactor OlivierDehaene 2024-06-10 11:44:50 +0200
713d70b443 re-working logic, wip OlivierDehaene 2024-06-07 13:39:42 +0200
298bf31e69 add terminated_generations OlivierDehaene 2024-06-07 11:26:17 +0200
3c596983ba fix python tests OlivierDehaene 2024-06-06 10:18:26 +0200
51fa606875 fix OlivierDehaene 2024-06-05 21:32:46 +0200
35f27cbcc1 working example OlivierDehaene 2024-06-05 18:47:16 +0200
1cc86930a6 wip OlivierDehaene 2024-06-05 17:01:06 +0200
18e77a5cc7 wip OlivierDehaene 2024-06-05 15:28:10 +0200
a4bebdc281 Use minijinja's pycompat mode for python methods Armin Ronacher 2024-06-11 11:56:05 +0200
73b067d193 skip exl2 tests on rocm fxmarty 2024-06-11 09:29:08 +0000
b452620c04 fix gptq tests, LLMM1 matrix bound fxmarty 2024-06-11 07:27:14 +0000
b0c0be48cf use xpu-smi to dump used memory xpu use "ZE_AFFINITY_MASK" to control card, usage is like CUDA_VISIBLE_DEVICES Wang, Yi A 2024-06-10 19:33:52 -0700
4ce8494ceb fix: add trufflehog lint drbh 2024-06-10 19:16:55 +0000
8c24b1282b fix: adjust typos drbh 2024-06-10 18:59:22 +0000
bcf2b29577 feat: support response_format in chat drbh 2024-06-10 18:33:13 +0000
dfca1dfc5e

fix(ci): remove unnecessary permissions (#2045) Luc Georges 2024-06-10 18:16:53 +0200
992d6c63e0

fix(ci): remove unnecessary permissions Luc Georges 2024-06-10 18:14:22 +0200
4e74ec09a8

feat(ci): add trufflehog secrets detection (#2038) Luc Georges 2024-06-10 17:54:13 +0200
2dc6e70b1a doc: add architecture to toctree Alvaro Moran 2024-06-10 17:23:28 +0200
b5704427fe doc: adding architecture document Alvaro Moran 2024-06-07 16:03:58 +0200
d6cf63ca53 Update lora.md Derek 2024-06-10 06:56:37 +0400
1be1ebc438 Update lora.md Derek 2024-06-10 06:53:34 +0400
ce40ad26fd fix: add model_id to IdeficsCausalLM drbh 2024-06-07 04:36:32 +0000
101b95adc4 fix: update all models forwards to include adapter_data drbh 2024-06-07 03:58:03 +0000
1deb372564 fix: add adapter_data param to phi and neox drbh 2024-06-07 03:28:15 +0000
b1169273fd fix: add adapter_data param and avoid missing layers drbh 2024-06-07 03:03:15 +0000
91f407226d feat: support if vlm models drbh 2024-06-07 02:21:06 +0000
a563a93113 fix: rename doc to retry ci build drbh 2024-06-07 01:23:52 +0000
611225f017 feat: support base model generation and refactors drbh 2024-06-07 01:20:41 +0000
43ec9dfe32 feat: bump launcher and add new lora docs drbh 2024-06-06 23:49:07 +0000
81707bfbfa fix: include rust code for adapter id drbh 2024-06-06 23:23:17 +0000
68399c1ae3 feat: prefer model id in request drbh 2024-06-06 23:21:10 +0000
de56a81c5c feat: add lora support to mistral and refactors drbh 2024-06-06 22:44:58 +0000
9c45d34983 fix: add model_id to model test drbh 2024-06-06 21:24:29 +0000
dc0f76553c fix: pass model_id for all causal and seq2seq lms drbh 2024-06-06 21:13:14 +0000
88bd5c2c92 fix: pass model_id for all flash causal lms drbh 2024-06-06 21:02:03 +0000
73eb2ae255 fix: refactor and move changes to v3 proto drbh 2024-06-06 20:31:27 +0000
c927376725 fix: adjust adapter_segments logic when in batch drbh 2024-06-06 18:53:18 +0000
ad088d51fa fix: adjust batch for bgmv drbh 2024-06-06 17:45:08 +0000
8984ce6c69 feat: perfer loraxs custom punica kernels and add mlp loras drbh 2024-06-06 15:57:00 +0000
d5f21d57d1 fix: prefer adapter_data and refactors drbh 2024-06-06 14:35:59 +0000
8b50f4b779 feat: prefer lorax implementation and port loading logic drbh 2024-06-05 23:56:04 +0000
c661631225 feat: baseline impl single request multi lora support drbh 2024-06-04 20:07:28 +0000
a046c303f7 fix: refactor and reduce lora math drbh 2024-06-04 05:01:52 +0000
0a6ea7fb57 feat: load weights within layer and refactor lora pass drbh 2024-06-04 01:38:43 +0000
db3d8e6518 feat: first draft load multiple lora drbh 2024-05-30 19:16:15 +0000
a1cbfc16e9 Contributing guide & Code of Conduct Lysandre 2024-06-10 15:47:17 +0200
d3c7f63416 Merge branch 'main' into amd-ci-fx amd-ci-fx fxmarty 2024-06-10 15:10:04 +0200
de6f2cd08d disable marlin tests on rocm/xpu fxmarty 2024-06-10 13:06:11 +0000
05caaa2f31 fix: split docs and start conceptual page (#1836) drbh 2024-05-01 03:03:25 -0400
d9108dd7b2 (chore): torch 2.3.0 (#1833) Nicolas Patry 2024-04-30 18:15:35 +0200
e43e511c8d chore: update torch (#1730) OlivierDehaene 2024-04-30 14:04:28 +0200
c5e3357293 Handle images in chat api (#1828) drbh 2024-04-30 06:18:32 -0400
5ceaeb9f31 feat: add vlm docs and simple examples (#1812) drbh 2024-04-30 06:14:39 -0400
8c847f2b60 Fixing frequency penalty (#1811) Martin Iglesias Goyanes 2024-04-30 12:13:23 +0200
cd72a57123 feat: add how it works section (#1773) drbh 2024-04-30 05:45:49 -0400
91352b1b71 fix: use get_speculate to the number of layers (#1737) OlivierDehaene 2024-04-30 11:45:26 +0200