text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-10 03:44:54 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

900ac49454 Fixing GTPQ device santacoder. Nicolas Patry 2023-07-20 19:08:33 +0000
7faef69015 Give escape hatch to not use exllama kernels even if available. Nicolas Patry 2023-07-20 17:47:09 +0000
8cf7c89910 Small polish. Nicolas Patry 2023-07-20 17:44:37 +0000
0860394489 Refactored a bit. Nicolas Patry 2023-07-20 17:38:50 +0000
f555dabca8 Putting back header inclusion (seems unused but still) simpler_exllama Nicolas Patry 2023-07-20 15:46:51 +0000
5ca0508d02 Simpler exllama Nicolas Patry 2023-07-20 15:36:53 +0000
bf94df3c71

fix(server): use mem_get_info to get kv cache size (#664) OlivierDehaene 2023-07-20 17:23:49 +0200
08b8eec1d7

fix(server): Fixing non parameters in quantize script bigcode/starcoder was an example. (#661) Nicolas Patry 2023-07-20 16:04:15 +0200
3db5d4a654 fix(server): use mem_get_info to get kv cache size OlivierDehaene 2023-07-20 16:00:15 +0200
362883f259

fix(server): llama v2 GPTQ (#648) fxmarty 2023-07-20 15:02:54 +0200
214c06f510

Add trust_remote_code to quantize script (#647) cdawg 2023-07-20 13:53:08 +0200
929e374753 Fixing quantize script on models with non parameters buffers. Nicolas Patry 2023-07-20 11:16:34 +0000
a1859012c4

Merge branch 'main' into patch-2 Dong Shin 2023-07-20 11:37:46 +0900
88d753d79b

Merge branch 'huggingface:main' into bnb-4bit krzim 2023-07-19 18:11:41 -0400
c52a5d4456 add documentation for 4bit quantization options krzim 2023-07-19 22:10:34 +0000
6bf7090ecd fix per-column quantization Felix Marty 2023-07-19 17:55:41 +0000
2080735e16 taste Felix Marty 2023-07-19 16:35:23 +0000
5882768682 nit Felix Marty 2023-07-19 16:31:46 +0000
2b3f65048d

line break cdawg 2023-07-19 17:50:46 +0200
d6649411c4

Update quantize.py cdawg 2023-07-19 17:43:49 +0200
edfbfdfb3f Merge branch 'main' into gptq-cuda-kernels Félix Marty 2023-07-19 16:58:54 +0200
5a1512c025

docs: Update README.md (#643) Nicolas Patry 2023-07-19 13:39:12 +0200
1c81df15cd

docs: Update README.md (#639) Nicolas Patry 2023-07-19 13:38:52 +0200
fd851a60be

Update README.md Nicolas Patry 2023-07-19 12:09:43 +0200
b66b190403

feat(router): ngrok edge (#642) OlivierDehaene 2023-07-19 11:59:58 +0200
42f85addaa feat(router): ngrok edge OlivierDehaene 2023-07-19 11:59:21 +0200
df61543e0d

Update README.md Nicolas Patry 2023-07-19 10:55:04 +0200
fe80f5360c

feat(server): auto max_batch_total_tokens for flash att models (#630) OlivierDehaene 2023-07-19 09:31:25 +0200
2934543a59 0.98 OlivierDehaene 2023-07-19 02:06:16 +0200
406b094002 0.985 OlivierDehaene 2023-07-19 01:50:19 +0200
0a02801822 try 0.99 OlivierDehaene 2023-07-19 01:26:42 +0200
7f399cd848 revert OlivierDehaene 2023-07-19 01:15:59 +0200
8793ae5890 add clear cache when batch is finished OlivierDehaene 2023-07-19 01:12:28 +0200
0111869ad0 use less memory OlivierDehaene 2023-07-19 00:42:15 +0200
05d2a77e4c reset peak memory OlivierDehaene 2023-07-19 00:17:49 +0200
99568eef7b add tmate OlivierDehaene 2023-07-18 19:43:48 +0200
45d24bea52 sleep to connect to the CI runner OlivierDehaene 2023-07-18 19:29:14 +0200
5e6ddfd6a4

fix(server): fix llamav2 config (#635) v0.9.3 OlivierDehaene 2023-07-18 18:49:42 +0200
4409bcf893 fix(server): fix llamav2 config OlivierDehaene 2023-07-18 18:46:38 +0200
cf83f9b66f

v0.9.3 (#634) OlivierDehaene 2023-07-18 18:11:20 +0200
7288cb8640 v0.9.3 OlivierDehaene 2023-07-18 18:11:00 +0200
211b211ec0

feat(server): add support for llamav2 (#633) Nicolas Patry 2023-07-18 18:09:53 +0200
36a9bddde4 use max_memory_reserved OlivierDehaene 2023-07-18 18:06:46 +0200
7a60f4d8c3 Llamav2 Post flashv2 Nicolas Patry 2023-07-18 16:55:58 +0200
1686a7c0dc add syncs OlivierDehaene 2023-07-18 17:03:29 +0200
160a50af77 cleanup OlivierDehaene 2023-07-18 16:18:56 +0200
de892fb434 revert back to normal allocator OlivierDehaene 2023-07-18 16:11:18 +0200
79616a8796 add block size parameter OlivierDehaene 2023-07-18 12:45:51 +0200
d2e3843588 pad to block size OlivierDehaene 2023-07-18 12:04:38 +0200
086d0c2252 update logs OlivierDehaene 2023-07-18 11:43:11 +0200
a6b128b293 fix default value OlivierDehaene 2023-07-18 11:41:10 +0200
4201a8be46 fix default value OlivierDehaene 2023-07-18 11:39:14 +0200
b165f8b7b7 feat(server): auto max_batch_total_tokens for flash att models OlivierDehaene 2023-07-18 11:33:49 +0200
3b71c38558

feat(server): flash attention v2 (#624) OlivierDehaene 2023-07-18 16:21:18 +0200
751f26b66c fix dockerfile OlivierDehaene 2023-07-18 15:29:02 +0200
d186b13c59 fix OlivierDehaene 2023-07-18 12:36:27 +0200
4d38a1c4ad

feat(server): Reworking the quantization script so it's still universal (not llama specific) (#587) Nicolas Patry 2023-07-18 12:19:05 +0200
bc2f351980 abstraction above flash OlivierDehaene 2023-07-18 10:32:10 +0200
f400f2d58b use native grouped attention OlivierDehaene 2023-07-18 09:21:22 +0200
4e0d8b2efb export requirements with bnb krzim 2023-07-17 21:10:01 +0000
432ab71be9 add 4bit bnb quantization krzim 2023-07-17 20:23:23 +0000
8ff7d57443 add AutoModel error message for 4bit quantization krzim 2023-07-17 19:31:39 +0000
9c11372d8f add bnb 4bit to quantization enums krzim 2023-07-17 19:31:11 +0000
aded1c161e update bnb requirements krzim 2023-07-17 19:29:05 +0000
44acf72a73

fea(launcher): debug logs (#623) OlivierDehaene 2023-07-17 19:03:07 +0200
bc2873246c

fix(launcher): Rename b-float16 to bfloat16 in the launcher arg (#621) Nicolas Patry 2023-07-17 18:38:16 +0200
e856983781 fea(launcher): debug logs OlivierDehaene 2023-07-17 18:37:52 +0200
2d4b31070e fix OlivierDehaene 2023-07-17 17:39:45 +0200
107fcfe9b6 feat(server): flash attention v2 OlivierDehaene 2023-07-17 17:34:55 +0200
5a68b3f751 Rename b-float16 to bfloat16 in the launcher arg (just more usual). Nicolas Patry 2023-07-17 14:34:58 +0200
b4ce728b4f Fix env vars Ian 2023-07-17 04:36:04 +0000
0ec4d8182f Update conditionals for dynamic scaling Ian 2023-07-17 01:17:02 +0000
f01c11bd0c Implement scaled and dynamically scaled RoPE Ian 2023-07-01 02:18:03 +0000
abe4e4b1cc

fix: LlamaTokenizerFast to AutoTokenizer at flash_llama.py Dong Shin 2023-07-16 21:09:02 +0900
a2cf1bdb2f fix(server): empty_cache when stopped OlivierDehaene 2023-07-15 13:57:31 +0200
3e5165c3ed

Directly load GPTBigCode to specified device Yang, Bo 2023-07-15 00:32:46 -0700
c58a0c185b

v0.9.2 (#616) v0.9.2 OlivierDehaene 2023-07-14 16:31:48 +0200
743a8ea9c9 v0.9.2 OlivierDehaene 2023-07-14 15:39:04 +0200
152085461d

Merge pull request #1 from bbc/fix-type-hint Matt Haynes 2023-07-14 12:06:04 +0100
5161d4148e Change type hint for backward compatibility with python <3.9 Ciarán Byrne 2023-07-14 12:00:59 +0100
51e3f84453 Remove unused cert from async client Ciarán Byrne 2023-07-14 12:00:16 +0100
5b9de4a1d3

fix(server): blacklist local files (#609) OlivierDehaene 2023-07-13 21:54:55 +0200
c8b077be79

docs: README: Add logo + baseline (#611) Victor Muštar 2023-07-13 21:45:20 +0200
7f18519806

move image header to top Victor Muštar 2023-07-13 20:59:43 +0200
abb02d6556

Add logo + baseline Victor Muštar 2023-07-13 20:53:15 +0200
982ce3227b

feat(router): explicit warning if revision is not set (#608) OlivierDehaene 2023-07-13 18:59:38 +0200
17aefa4c76 fix(server): blacklist local files OlivierDehaene 2023-07-13 18:55:58 +0200
e6b4bfac02 feat(router): explicit warning if revision is not set OlivierDehaene 2023-07-13 18:49:31 +0200
74e6d6e54e fix the usual merge mess Felix Marty 2023-07-13 15:48:55 +0000
9401e10210 Merge branch 'main' into gptq-cuda-kernels Félix Marty 2023-07-13 17:45:52 +0200
0036084294 support all, test llama Felix Marty 2023-07-13 15:41:57 +0000
ae6256a17a Add option cert param to client Matt Haynes 2023-07-13 14:23:28 +0100
b7327205a6

feat(launcher): add arg validation and drop subprocess (#595) OlivierDehaene 2023-07-13 14:22:37 +0200
2ae65b45a8 fix tests Felix Marty 2023-07-13 10:38:08 +0000
82a7f9eb53

Convert example docker command to use :latest rather than being pegged to 0.9 bealbrown 2023-07-12 23:12:05 -0400
38c2be5926 fix test Felix Marty 2023-07-12 18:31:49 +0000
3628559516

GPTQ Env vars: catch correct type of error (#596) ssmi153 2023-07-13 01:57:46 +0800
faa5b52fdc Merge branch 'main' into gptq-cuda-kernels Félix Marty 2023-07-12 18:47:30 +0200
8645fd39e1 tests Felix Marty 2023-07-12 16:42:34 +0000
f90c61a340 support bits different than 4 Felix Marty 2023-07-12 16:19:25 +0000