text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-10 03:44:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

67d687609b cleanup Felix Marty 2023-07-12 16:16:58 +0000
67a46b7361 move exllama buffer init to the top level Felix Marty 2023-07-12 16:09:26 +0000
4462854e1b have a single gptq quantization type Felix Marty 2023-07-12 15:43:20 +0000
549df839d7 Tuple rather than list of exception types ssmi153 2023-07-12 23:26:09 +0800
29ff597ef9 Merge branch 'main' of https://github.com/ssmi153/text-generation-inference ssmi153 2023-07-12 23:13:38 +0800
dc761f148d GPTQ env vars: Catch Runtime errors ssmi153 2023-07-12 23:10:35 +0800
f2f0289fb9 feat(server): empty cache on errors OlivierDehaene 2023-07-12 17:05:50 +0200
073c1a884d

Merge branch 'huggingface:main' into main ssmi153 2023-07-12 23:06:19 +0800
67347950b7

feat(server): Implements sharding for non divisible vocab_size. (#583) Nicolas Patry 2023-07-12 16:43:31 +0200
f588d32ea4 feat(launcher): add arg validation and drop subprocess OlivierDehaene 2023-07-12 16:38:30 +0200
b3f830abc3 Reworking the quantization script so it's still universal (not llama specific) Nicolas Patry 2023-07-11 17:25:26 +0000
f764bc1b52 Fixing OOM on non sharded. Nicolas Patry 2023-07-12 12:46:02 +0000
bfa3920aec BNB 4bits. bnb4 Nicolas Patry 2023-07-12 12:42:43 +0000
2c4bf88268

fix(server): Bug fixes for GPTQ_BITS environment variable passthrough (#590) ssmi153 2023-07-12 20:17:35 +0800
636a4cca85 Bug fixes for GPTQ_BITS env var passthrough ssmi153 2023-07-12 17:25:24 +0800
6193512c4b

Update server/text_generation_server/utils/layers.py OlivierDehaene 2023-07-12 11:05:07 +0200
63f03b4b7d Just don't shard LMHead if not divisible. Nicolas Patry 2023-07-12 09:03:16 +0000
2e76727910 Doesn't affect LM_Head. Nicolas Patry 2023-07-11 12:51:13 +0000
906027ae58 Enabling non divisble vocab_size. Nicolas Patry 2023-07-11 12:37:25 +0000
7f9072228a

fix(server): Adding logger import to t5_modeling.py (#585) Adam Kowalski 2023-07-12 03:40:32 -0500
db4efbf4bc

fix(server): T5 weights names. (#582) enable_non_divisible_embeddings Nicolas Patry 2023-07-12 10:01:42 +0200
f063ebde10

chore: migrate ci region for more availability. (#581) Nicolas Patry 2023-07-12 10:01:01 +0200
5bd2ab6583

feat(server): Support for env value for GPTQ_BITS and GPTQ_GROUPSIZE. (#580) Nicolas Patry 2023-07-12 10:00:02 +0200
f0181436f4

fix(server): Fixing RW code (it's remote code so the Arch checking doesn't work to see which weights to keep). (#579) Nicolas Patry 2023-07-12 09:51:34 +0200
f5e8f73a1c

Update server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py Nicolas Patry 2023-07-12 08:38:17 +0200
a1c23f3823

Update layers.py Florian Zimmermeister 2023-07-11 18:47:50 +0200
64accc59f1

Update seq2seq_lm.py Florian Zimmermeister 2023-07-11 18:37:51 +0200
780198b9e4

Update santacoder.py Florian Zimmermeister 2023-07-11 18:37:09 +0200
377c01e21e

Update rw.py Florian Zimmermeister 2023-07-11 18:36:46 +0200
198e6179ef

Update causal_lm.py Florian Zimmermeister 2023-07-11 18:36:04 +0200
0e048e4347

Adding logger import to t5_modeling.py Adam Kowalski 2023-07-11 11:35:23 -0500
f2fae6db91

Update requirements.txt bnb version Florian Zimmermeister 2023-07-11 18:33:32 +0200
2a3f9cf5c2 Fix T5 weights names. Nicolas Patry 2023-07-11 12:06:01 +0000
5562b510b3 Update closing runner. Nicolas Patry 2023-07-11 10:34:47 +0000
3ef9d56847 migrate ci region for more availability (fingers crossed). Nicolas Patry 2023-07-11 10:12:24 +0000
1b7b91a4d3 Support for env value for GPTQ_BITS and GPTQ_GROUPSIZE. Nicolas Patry 2023-07-11 10:33:29 +0200
d9ed7b9274 Fixing RW code (it's remote code so the Arch checking doesn't work to see which weights to keep). Nicolas Patry 2023-07-10 18:40:09 +0000
1e62237d44 Adding additional response header X-Total-Tokens Julian Bright 2023-07-11 03:17:09 +1000
b4024edd45

feat: better errors for warmup and TP (#575) OlivierDehaene 2023-07-10 14:47:15 +0200
9d60030ba0 feat: better errors for warmup and TP OlivierDehaene 2023-07-10 12:43:44 +0200
20ca9cf0c3 Memory fragmentation added for Causal LM ankit201 2023-07-09 03:35:47 +0000
15de7c7ac3 DockerFile change Ankit Singh 2023-07-01 13:37:43 +0000
5cdd242fec

Update client.py : Adding missing arg "best_of" in generate_stream function yash bhaskar 2023-07-07 22:06:09 +0530
e943a294bc

fix(server): harden the weights choice to save on disk. (#561) Nicolas Patry 2023-07-07 14:50:12 +0200
193eae246c Update test. Nicolas Patry 2023-07-06 22:02:23 +0000
aae9d6faf7 Attempting to harden a bit the weights choice to save on disk. Nicolas Patry 2023-07-06 21:36:00 +0000
31b36cca21

v0.9.1 (#558) v0.9.1 OlivierDehaene 2023-07-06 16:05:42 +0200
f1f7674ae9 v0.9.1 OlivierDehaene 2023-07-06 16:03:53 +0200
c4bb5264ac

fix(server): decrease memory fragmentation (#557) OlivierDehaene 2023-07-06 14:28:33 +0200
39e37ec624 fix(server): decrease memory fragmentation OlivierDehaene 2023-07-06 13:07:08 +0200
a6e387404d try-catch to load the cuda extension, quite ugly practice tbh Felix Marty 2023-07-05 17:53:56 +0000
620ed7d8aa Merge branch 'gptq-cuda-kernels' of https://github.com/fxmarty/text-generation-inference into gptq-cuda-kernels Felix Marty 2023-07-05 16:42:37 +0000
2272b3a456 some more cleanup Felix Marty 2023-07-05 16:42:13 +0000
0ff8219fdb Merge branch 'main' into gptq-cuda-kernels Félix Marty 2023-07-06 01:31:05 +0900
6f42942772

feat(router): add argument for hostname in router (#545) (#550) OlivierDehaene 2023-07-05 18:28:45 +0200
c858d791e5 add attribution Felix Marty 2023-07-05 16:15:10 +0000
ee7ba48b9a add exllama gptq kernel Felix Marty 2023-07-05 15:43:42 +0000
22fc605f4e add hostname to launcher OlivierDehaene 2023-07-05 09:39:00 +0200
57886c8fc4

feat(router): add argument for hostname in router (#545) Phil Chen 2023-07-05 09:35:28 +0200
0a468fdf7d Add argument for hostname in router Phil Chen 2023-07-05 00:49:53 +0200
31e2253ae7

feat(server): use latest flash attention commit (#543) OlivierDehaene 2023-07-04 20:23:55 +0200
e4b26aa10b

fix(server): avoid errors for very small top_p values (#544) Nick Hill 2023-07-04 11:11:33 -0700
8a7bfcd571 fix(server): avoid errors for very small top_p values Nick Hill 2023-07-04 10:59:40 -0700
ab860d371a feat(server): use latest flash attention commit OlivierDehaene 2023-07-04 19:33:49 +0200
2a101207d4

fix(server): Handle loading from local files for MPT (#534) Antoni Baum 2023-07-04 09:37:25 -0700
e6888d0e87

docs(benchmarker): Adding some help for the options in text-generation-benchmark. (#462) Nicolas Patry 2023-07-04 18:35:37 +0200
742199aa0d Modified fix. Nicolas Patry 2023-07-04 11:30:59 +0200
81f234ec61 Revert "Map deduplicated tensors via metadata" Nicolas Patry 2023-07-04 11:30:35 +0200
8405581fcd

fix: Update server/Makefile to include Makefile-vllm (#520) Antoni Baum 2023-07-04 00:39:25 -0700
5c490fb56a

Handle loading from local files for MPT Antoni Baum 2023-07-03 12:19:54 -0700
1da07e85aa

feat(server): Add Non flash MPT. (#514) Nicolas Patry 2023-07-03 13:01:46 +0200
2c30ff567e Remove comment. Nicolas Patry 2023-07-03 08:43:02 +0000
ed0c5bd1ed Removing commented things (raising proper errors instead). Nicolas Patry 2023-07-03 08:42:26 +0000
b591527a6c Einops. Nicolas Patry 2023-07-01 19:35:26 +0000
e28a809004

v0.9.0 (#525) v0.9.0 OlivierDehaene 2023-07-01 19:25:41 +0200
da9c4655c3 fix launcher OlivierDehaene 2023-07-01 18:44:43 +0200
5654537065 v0.9.0 OlivierDehaene 2023-07-01 17:50:03 +0200
44561927e0 Adding integration tests snapshots. Nicolas Patry 2023-07-01 10:30:09 +0000
24c0f1cc7a Adding (failing) integration tests. Nicolas Patry 2023-06-30 21:55:37 +0000
c62527a542 Fixed MPT sharding. Nicolas Patry 2023-06-30 21:46:44 +0000
f33ad7ed98 Non flash MPT. Nicolas Patry 2023-06-30 09:52:49 +0000
2b53d71991

fix(launcher): fix issue where launcher does not properly report shard failures (#522) OlivierDehaene 2023-06-30 23:09:20 +0200
5ec19ef951 fix(launcher): fix issue where launcher does not properly report shard failures OlivierDehaene 2023-06-30 21:54:20 +0200
51f2735f6c

Ensure classmethods use cls instead of the class directly Antoni Baum 2023-06-30 11:47:42 -0700
4656414977

Update server/Makefile to include Makefile-vllm Antoni Baum 2023-06-30 11:43:45 -0700
ecf6dc3a5a

feat: Add the option to force another dtype than f16. (#513) Nicolas Patry 2023-06-30 20:30:09 +0200
3b0c979efc

feat(router): arg validation (#519) OlivierDehaene 2023-06-30 20:07:49 +0200
ee5463a431 feat(router): arg validation OlivierDehaene 2023-06-30 19:45:35 +0200
e74bd41e0f

feat(server): add paged attention to flash models (#516) OlivierDehaene 2023-06-30 19:09:59 +0200
b1831d5f97 double free OlivierDehaene 2023-06-30 17:43:40 +0200
3c4243d627 fix drop OlivierDehaene 2023-06-30 16:52:25 +0200
8ec0edcfe3 fix OlivierDehaene 2023-06-30 16:47:00 +0200
c52e84fe10 small refactor OlivierDehaene 2023-06-30 16:32:23 +0200
c5da6579dc flash neox is flaky OlivierDehaene 2023-06-30 14:06:44 +0200
8a41ac8bb9 remove debug logging OlivierDehaene 2023-06-30 13:23:50 +0200
16f796f735 add falcon, santacoder and neox support OlivierDehaene 2023-06-30 13:19:44 +0200
02e43ccf6f FInal touches. Nicolas Patry 2023-06-30 08:39:15 +0000
59474c29aa Fix cli name. Nicolas Patry 2023-06-30 08:08:13 +0000
89e4015844 non modeling. Nicolas Patry 2023-06-30 07:52:36 +0000
0a50ac31a7 Remove mpt. Nicolas Patry 2023-06-30 07:52:01 +0000