text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 04:14:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

93e7ba54c0 fix tests OlivierDehaene 2024-04-10 17:20:07 +0200
07a3050b20 fixed OlivierDehaene 2024-04-10 16:47:41 +0200
ae6215fcea

Enable server UT: test_causal_lm.py::test_batch_from_pb (#121) Jacek Czaja 2024-04-10 16:33:56 +0200
2e7f6e8012 freaking rotary OlivierDehaene 2024-04-10 15:18:51 +0200
424e1b41a2 update vllm version OlivierDehaene 2024-04-10 11:20:53 +0200
87505bf28a fix OlivierDehaene 2024-04-10 08:45:56 +0200
4634b00c2a

Adding Llava-Next (Llava 1.6) with full support. (#1709) Nicolas Patry 2024-04-09 21:32:00 +0200
f4f1e206db remove imports OlivierDehaene 2024-04-09 19:32:26 +0200
26da6bfb2d fix mistral OlivierDehaene 2024-04-09 19:31:16 +0200
d4da0d4d97 use custom vllm with kv_head_mapping OlivierDehaene 2024-04-09 19:04:44 +0200
0604c5cb83 add py-cpuinfo OlivierDehaene 2024-04-08 14:56:51 +0200
946bf44242 fix cohere OlivierDehaene 2024-04-05 18:42:33 +0200
91d76a65f5 remove log_level from python shard OlivierDehaene 2024-04-05 17:29:38 +0200
0c88cb6327 remove log_level from python shard OlivierDehaene 2024-04-05 16:50:42 +0200
d7497f55cf update dockerfile OlivierDehaene 2024-04-05 15:36:27 +0200
847df6099a update dockerfile OlivierDehaene 2024-04-05 11:41:56 +0200
58a7719e02 fix OlivierDehaene 2024-04-04 19:11:50 +0200
4a02d3505f add contiguous OlivierDehaene 2024-04-04 18:48:58 +0200
5088005908 fix: fix CohereForAI/c4ai-command-r-plus OlivierDehaene 2024-04-04 18:46:51 +0200
4217ddb842 Move import up. Nicolas Patry 2024-04-09 17:19:26 +0000
8c114e5fc4 Fixing select_best_resolution. Nicolas Patry 2024-04-09 17:16:15 +0000
61821f410a Update mt0 (not more truncating). Nicolas Patry 2024-04-09 13:11:25 +0000
30cc78773e

Skip server tests of not enabled models (#125) Karol Damaszke 2024-04-09 14:15:41 +0200
2283562bfc Created all the logic server side (with image download on the fly too). Nicolas Patry 2024-04-09 11:26:30 +0000
c6739526c6

Fix test_watermark (#124) Karol Damaszke 2024-04-09 11:29:21 +0200
106d8ee818

Automatic quantization config. (#1719) Nicolas Patry 2024-04-09 10:27:57 +0200
757c12dbac

Fix test_pass_through_tokenizer (#117) Sylwester Fraczek 2024-04-09 09:30:47 +0200
fd536f2017 Automatic quantization config. Nicolas Patry 2024-04-09 05:40:52 +0000
215030ac88 Tmp dump (sending real image for real memory offset to be computed. Nicolas Patry 2024-04-09 05:15:09 +0000
d0bc603fe6 feat: explore compiled MLP bench op-compilation-benchmarking drbh 2024-04-09 02:36:09 +0000
2762e6883e fix: include fsm_grammar_states in FlashMistralBatch from_pb fix-grammar-fsm-batching drbh 2024-04-08 17:23:46 +0000
d957e32601

Add Habana copyright header (#122) Karol Damaszke 2024-04-08 18:06:21 +0200
204d2d8a2f

docker image: text-generation-launcher wrapper as entrypoint Raphael Glon 2024-04-08 16:48:22 +0200
274b68ad7d More GPUs for more VRAM. Nicolas Patry 2024-04-08 14:52:34 +0000
b65beb43d3

Revert "Regenerate ld.so.cache (#1708)" Raphael Glon 2024-04-08 16:45:34 +0200
635701ca29

feat: add async context manager for AsyncClient Sabidao 2024-04-08 17:16:19 +0300
a7ac9877c2 Force the actual upgrade. Nicolas Patry 2024-04-08 14:13:26 +0000
39620ce29f Fixed load test. Bad sanitation on the router meant CUDA OOM. Nicolas Patry 2024-04-08 14:08:02 +0000
ff42d33e99

Revert license to Apache 2.0 (#1714) OlivierDehaene 2024-04-08 15:06:16 +0200
314f1363a4

Empty commit Julien Chaumond 2024-04-08 15:01:01 +0200
b00cdc5140

Revert "chore: update license to HFOIL (#725)" OlivierDehaene 2024-04-08 14:59:16 +0200
99771cfad5 Upgrade tests (still missing load tests for some reason). Nicolas Patry 2024-04-08 09:56:37 +0000
0bd7ef5d7f

Update libraries abhishek thakur 2024-04-08 11:39:34 +0200
53c2c3dbc7

Regenerate ld.so.cache (#1708) oOraph 2024-04-08 08:52:10 +0200
ccbfc05db5 Fixing integration tests ? (Failures locally). Nicolas Patry 2024-04-05 18:06:04 +0000
8dca3b04f8

Force weights_only (before fully breaking pickle files anyway). (#1710) Nicolas Patry 2024-04-05 19:23:57 +0200
7852a85b57 Adding docs. Nicolas Patry 2024-04-05 16:06:01 +0000
6c350f2f75 Working for TP, Llama + Mistral Nicolas Patry 2024-04-05 15:27:29 +0000
df4c700828 Tmp dump (running on images hardcoded size.) Nicolas Patry 2024-04-04 21:42:57 +0000
5f4b395480 More work on the CLIP Side. Nicolas Patry 2024-04-04 18:08:38 +0000
b8be0d1ae7 Update by abstracting away text model. Nicolas Patry 2024-04-03 16:41:01 +0000
b68fc4deb1 Llava next dump. Nicolas Patry 2024-04-02 09:40:27 +0000
422f23be74 Force weights_only (before fully breaking pickle files anyway). Nicolas Patry 2024-04-05 16:17:16 +0000
4fb19f25be

Regenerate ld.so.cache Raphael Glon 2024-04-05 17:56:50 +0200
f9958ee191

Fixing cohere tokenizer. (#1697) Nicolas Patry 2024-04-05 16:44:19 +0200
5062fda4ff

Push users to streaming in the readme. (#1698) Nicolas Patry 2024-04-05 16:44:10 +0200
c7e570e59d

Pickle conversion now requires --trust-remote-code. (#1704) Nicolas Patry 2024-04-05 13:32:53 +0200
b0f460a74c Make warning visible in the logs. Nicolas Patry 2024-04-05 11:31:51 +0000
e2c870c216 Dummy modification. Nicolas Patry 2024-04-05 08:32:25 +0000
96846f633a Soft deprecation with clear text explaining the rationale. Nicolas Patry 2024-04-05 08:21:23 +0000
99874eae74

Add cuda graphs sizes and make it default. (#1703) Nicolas Patry 2024-04-04 23:01:56 +0200
ac118a5ad0 Pickle conversion now requires --trust-remote-code. Nicolas Patry 2024-04-04 13:16:32 +0000
d67633a0c8 Fix disabling. Nicolas Patry 2024-04-04 13:01:27 +0000
6951962ffd Clarify disabling. Nicolas Patry 2024-04-04 12:59:29 +0000
edcbc0890c Move to cuda graphs by default (with possibility to choose graph sizes). Nicolas Patry 2024-04-04 12:46:28 +0000
06227f7b5e

Fix router tests (#119) Karol Damaszke 2024-04-04 11:10:11 +0200
e210e15e27

Update Cargo.lock file (#118) Karol Damaszke 2024-04-03 17:55:54 +0200
638685ea94 Push users to streaming in the readme. Nicolas Patry 2024-04-02 19:27:17 +0000
9b86418e21 Fixing cohere tokenizer. Nicolas Patry 2024-04-02 19:25:01 +0000
b0de25a285

Don't set rope_scaling for unsupported models (#115) Karol Damaszke 2024-04-02 12:12:02 +0200
3e28d7aa42

Align the default value with server's (#111) yuanwu2017 2024-04-01 18:44:20 +0800
4ee0a0c401

v1.4.5 (#1686) v1.4.5 OlivierDehaene 2024-03-29 19:17:24 +0100
93fd4fd2fe v1.4.5 OlivierDehaene 2024-03-29 19:07:35 +0100
f04255c694

feat: Add dbrx support (#1685) OlivierDehaene 2024-03-29 18:49:36 +0100
275a61aae6 use GPT2TokenizerFast by default OlivierDehaene 2024-03-29 18:46:28 +0100
dcfefc425a feat(models): Add DBRX OlivierDehaene 2024-03-29 18:41:35 +0100
7342baa2eb

Add support for rope_scaling and remove is_optimized_for_gaudi (#112) Karol Damaszke 2024-03-29 15:07:32 +0100
2c83d09d3b wip OlivierDehaene 2024-03-28 18:28:09 +0100
762dbf3f19

fix: handle batches with and without grammars (#1676) drbh 2024-03-28 12:02:01 -0400
818aee37e5

fix: adjust logprob response logic (#1682) drbh 2024-03-28 12:01:46 -0400
01ebb77d12 fix: adjust logprob response logic drbh 2024-03-28 00:00:55 +0000
bf5263b88b

Disable watermark with FP8 quantization (#114) Karol Damaszke 2024-03-27 13:32:20 +0100
56f00a552b

Adjust warmup to all possible bucket sizes and decode batch size = 1 (#113) jkaniecki 2024-03-27 11:59:51 +0100
9796b0e10d

Add simple continuous batching benchmark (#108) Karol Damaszke 2024-03-26 09:17:55 +0100
0cd04fe4f7 fix: handle batches with and without grammars drbh 2024-03-25 23:18:50 +0000
7f58680999

Add docker pull command in README (#110) regisss 2024-03-25 15:44:54 +0100
2b1581edac

Warmup greedy search in next token chooser (#109) jkaniecki 2024-03-22 23:43:20 +0100
6c4496a1a3

v1.4.4 (#1668) v1.4.4 OlivierDehaene 2024-03-22 18:44:05 +0100
57915957ab v1.4.4 OlivierDehaene 2024-03-22 18:05:21 +0100
1e9bcd9dd8

feat: cohere (#1660) OlivierDehaene 2024-03-22 17:59:25 +0100
f39cb899d9 remove torch from requirements OlivierDehaene 2024-03-22 17:21:34 +0100
f171bdc823

Inline images for multimodal models. (#1666) Nicolas Patry 2024-03-22 17:14:54 +0100
66914f7b19

fix: LlamaTokenizerFast to AutoTokenizer at flash_mistral.py (#1637) SeongBeomLEE 2024-03-23 01:13:13 +0900
08e9181418

feat: update client to 0.7 (#1667) OlivierDehaene 2024-03-22 17:10:56 +0100
bd73076761 feat: update client to 0.7 OlivierDehaene 2024-03-22 17:08:58 +0100
b775027422 update requirements OlivierDehaene 2024-03-22 16:55:35 +0100
cfc89bb396 faster OlivierDehaene 2024-03-21 09:49:58 +0100
56296cc43c feat: add cohere OlivierDehaene 2024-03-14 11:21:06 +0000
e2f9856a88 Inline images for multimodal models. Nicolas Patry 2024-03-22 09:01:49 +0000
deb440b3a2

Repair idefics integration tests. (#1663) Nicolas Patry 2024-03-21 22:21:03 +0100