text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-10 20:04:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

9a79c2f867 feat: support logprobs in streaming and non streaming chat drbh 2024-01-09 14:04:31 -0500
65c913b55d feat: support FinishReason in streaming and non streaming chat drbh 2024-01-09 13:47:54 -0500
8c4ab53780 feat: support repetition_penalty and improve non stream response drbh 2024-01-09 13:31:15 -0500
fba1953eb6 fix: add prompt_token_count to InferResponse for chat completions drbh 2024-01-09 13:04:29 -0500
adad67e3d0 fix: prefer apply_chat_template logic in HubTokenizerConfig struct drbh 2024-01-09 12:27:01 -0500
65db02f192 fix: use TORCH_NCCL_AVOID_RECORD_STREAMS=1 fix/avoid_record_streams OlivierDehaene 2024-01-09 17:59:16 +0100
446b3b6af7 fix: prefer index on StreamResponse drbh 2024-01-09 11:59:11 -0500
f82ff3f64a fix: adds index, model id, system fingerprint and updates do_sample param drbh 2024-01-09 11:54:20 -0500
91d7267534

Fix missing make target platform for local install: 'install-flash-attention-v2' (#1414) R. P. Ruiz 2024-01-09 10:19:31 -0500
55605a1247 remove log OlivierDehaene 2024-01-09 15:35:34 +0100
3d082ccba4 fix: follow base model for tokenizer in router OlivierDehaene 2024-01-09 15:28:05 +0100
564f2a3b75

fix: fix local loading for .bin models (#1419) OlivierDehaene 2024-01-09 15:21:00 +0100
3f9b3f4539 docs: update required CUDA version to 12.2 OlivierDehaene 2024-01-09 14:28:55 +0100
7ffe9023da Fix local load for Medusa PYNing 2024-01-09 19:42:03 +0800
84aa6ff5aa fix: fix local loading for .bin models OlivierDehaene 2024-01-09 11:48:57 +0100
ddf7412a6b fix: remove ChatTemplateError and add index to stream messages drbh 2024-01-08 08:52:01 -0500
716fe00d92 Fix missing make target: https://github.com/huggingface/text-generation-inference/issues/1397 deepily 2024-01-05 17:29:35 -0500
3ae9cd655d feat: supports openai chat completions API drbh 2024-01-05 15:33:42 -0500
2358a35485 feat: add mocked http request tests drbh 2024-01-03 16:13:50 -0500
252ccde104

Control prefill and decode batch size separately (#6) Karol Damaszke 2024-01-02 18:21:01 +0100
9ce069445e removing personal git workflow Łukasz Olszewski 2024-01-02 17:55:46 +0100
c46dd7e78b restoring original README Łukasz Olszewski 2024-01-02 17:53:54 +0100
ad7f839673 fix vllm import error Zeyu Li 2023-12-30 14:26:37 +0800
74d9dfa89e Fix incorrect use of bias in awq chenxichen 2023-12-27 03:25:47 +0000
76590818a3 fixing ram exhaustion during build issue Łukasz Olszewski 2023-12-23 13:19:25 +0100
1be2d9a8ec

Batch size bucketing (#5) Karol Damaszke 2023-12-22 21:53:01 +0100
43277c6c6a fixing requirements Łukasz Olszewski 2023-12-22 16:00:28 +0100
ca49490e07

Update docker-image.yml Lukasz Olszewski 2023-12-22 15:56:27 +0100
91dfb2272a

Create docker-image.yml Lukasz Olszewski 2023-12-22 15:49:28 +0100
630800eed3 v1.3.4 v1.3.4 OlivierDehaene 2023-12-22 15:46:04 +0100
d158e75435 updating benchmark.rc Łukasz Olszewski 2023-12-22 15:45:50 +0100
b223ac70b6

Merge branch 'huggingface:main' into main Lukasz Olszewski 2023-12-22 14:38:26 +0100
6e43e80b50 experimental new features Łukasz Olszewski 2023-12-22 14:36:13 +0100
e3dcd7f2c2

Disable tensor caching in HPU Graph execution (#4) jkaniecki 2023-12-22 13:51:16 +0100
d84b38e30d adding guidance and extra parameters for token bias Łukasz Olszewski 2023-12-22 11:14:06 +0100
529d7c2591

Fix local load for peft (#1373) Nicolas Patry 2023-12-21 17:29:23 +0100
fad3a40102 Updating hub test. Nicolas Patry 2023-12-21 16:25:34 +0000
83f81a6b89 Fix local load for peft Nicolas Patry 2023-12-21 15:35:15 +0000
564199bab3

feat: update exllamav2 kernels (#1370) OlivierDehaene 2023-12-21 17:25:22 +0100
d3b5ae27b0 Fix santacoder. Nicolas Patry 2023-12-21 15:38:40 +0000
9f42e5f6fd Preventing using exllama when act_order=True Nicolas Patry 2023-12-21 15:05:05 +0000
238cc311f1 back to v2 by def OlivierDehaene 2023-12-21 15:46:07 +0100
96a520ec78 remove v2 for now OlivierDehaene 2023-12-21 15:24:41 +0100
672f290901 fmt OlivierDehaene 2023-12-21 11:35:06 +0100
38df4c1d67 feat: update exllamav2 kernels OlivierDehaene 2023-12-21 11:32:55 +0100
987c959f73

docs: Change URL for Habana Gaudi support in doc (#1343) regisss 2023-12-21 11:05:35 +0100
1108560745 fixing requirements Łukasz Olszewski 2023-12-21 10:48:08 +0100
eb8923a97e

Peft safetensors. (#1364) Nicolas Patry 2023-12-20 15:37:14 +0100
e749d0cc5a Adding CFG (context free grammar) to TGI Łukasz Olszewski 2023-12-20 12:42:56 +0100
d5db3433c8 Peft safetensors. Nicolas Patry 2023-12-20 08:20:25 +0000
c4c799137f

Update ldcache to include libcuda.so during docker build Blair Johnson 2023-12-19 01:51:10 -0500
d077150eb7

fix: fix gpt-q with groupsize = -1 (#1358) OlivierDehaene 2023-12-18 16:07:05 +0100
ff3a79f272 fix: fix gpt-q with groupsize = -1 OlivierDehaene 2023-12-18 12:42:00 +0100
8428ed1011

fix: fix offline (#1341) (#1347) OlivierDehaene 2023-12-18 10:20:08 +0100
1b1bfa49b0

fix: fix logic if sliding window key is not present in config (#1352) OlivierDehaene 2023-12-15 14:56:17 +0100
50bfdfc003 fix: fix logic if sliding window key is not present in config OlivierDehaene 2023-12-15 14:15:40 +0100
9b56d3fbf5

feat: relax mistral requirements (#1351) OlivierDehaene 2023-12-15 12:52:24 +0100
d5b7e6e38f

Reuse the same function to list local weights everywhere Raphael Glon 2023-12-15 12:48:05 +0100
29f87920a8

Adapt unit tests to commit 28821bf Raphael Glon 2023-12-14 16:42:52 +0100
492c95dcbf

Text generation inference, fix offline Raphael Glon 2023-12-13 14:26:45 +0100
5b6367f87c fix imports OlivierDehaene 2023-12-15 11:35:31 +0100
68990a5635 feat: relax mistral requirements OlivierDehaene 2023-12-15 11:15:49 +0100
16c6f2a893 Update README for proper usage of LIMIT_HPU_GRAPH Harish Subramony 2023-12-14 23:34:10 -0800
f3aea78fb6 v1.3.3 v1.3.3 OlivierDehaene 2023-12-15 01:20:42 +0100
37555cf4e8

fix: max_past default value must be -1, not 0 (#1348) OlivierDehaene 2023-12-15 01:18:39 +0100
7bce6032a8 stronger parameter validation OlivierDehaene 2023-12-15 00:38:27 +0100
f75bbbcc63 fix: max_past default value must be -1, not 0 OlivierDehaene 2023-12-15 00:10:58 +0100
9b78a6eee3 fix: only keep stop sequence buffer if we have some OlivierDehaene 2023-12-14 17:04:58 +0100
80a69204c1 fix: slice stopping criteria buffer OlivierDehaene 2023-12-14 17:01:43 +0100
083c2de9f8 fix: fix quant linear autotune OlivierDehaene 2023-12-14 16:45:47 +0100
773aabdda6 fix: fix triton OutOfResources import OlivierDehaene 2023-12-14 16:04:26 +0100
50b495f3d8

feat: add more latency metrics in forward (#1346) OlivierDehaene 2023-12-14 15:59:38 +0100
4b0bd2d7c3 fix tests validation OlivierDehaene 2023-12-14 15:38:20 +0100
1e1408054b fix validation OlivierDehaene 2023-12-14 15:02:22 +0100
6f9366556a fix tests OlivierDehaene 2023-12-14 14:46:44 +0100
248eda7b20 fix decode timing OlivierDehaene 2023-12-14 12:41:10 +0100
701dd7da67 feat: add more latency metrics in forward OlivierDehaene 2023-12-14 12:13:26 +0100
44b267ab22 fix: fix gpt-q params loading OlivierDehaene 2023-12-14 11:02:16 +0100
2aa262dbe5 Change URL for Habana Gaudi support in doc regisss 2023-12-13 19:05:28 +0100
b0b76ce711

Text generation inference, fix offline Raphael Glon 2023-12-13 14:26:45 +0100
28821bfd5d fix: default max_new_tokens to 100 OlivierDehaene 2023-12-13 09:19:19 +0100
88aae2595d v1.3.2 v1.3.2 OlivierDehaene 2023-12-12 18:10:22 +0100
82670d9786

feat: add quant to mixtral (#1337) OlivierDehaene 2023-12-12 17:55:03 +0100
378986e30e feat: add quant to mixtral OlivierDehaene 2023-12-12 17:15:49 +0100
4353423102 Modify default for max_new_tokens in python client freitng 2023-12-12 15:24:01 +0100
ec6d4592d5 v1.3.1 v1.3.1 OlivierDehaene 2023-12-11 16:46:44 +0100
d0841cc8eb v1.3.0 v1.3.0 OlivierDehaene 2023-12-11 14:55:03 +0100
72ee382ded chore: formatting OlivierDehaene 2023-12-11 14:49:52 +0100
3a521c92b3

feat: mixtral (#1328) OlivierDehaene 2023-12-11 14:43:40 +0100
6c2ac3b5fb support h100 OlivierDehaene 2023-12-11 13:40:50 +0100
008733313c fix megablocks install OlivierDehaene 2023-12-11 13:34:51 +0100
d0aff8e5e2 copy correct conda env OlivierDehaene 2023-12-11 12:54:44 +0100
e55870b03e rebase OlivierDehaene 2023-12-11 12:53:33 +0100
66238a1c94 update megablocks commit OlivierDehaene 2023-12-11 12:50:46 +0100
fdd8577bcc add git OlivierDehaene 2023-12-11 11:49:25 +0100
b5448af381 move install megablocks to its own command OlivierDehaene 2023-12-11 11:20:11 +0100
5799e5cae9 transformers format OlivierDehaene 2023-12-11 11:14:40 +0100
e69eed8ea3 remove a tad of cpu bottleneck OlivierDehaene 2023-12-11 10:32:13 +0100
af1989459c wip OlivierDehaene 2023-12-10 10:38:23 +0100
9ecfa16b12

Speculative (#1308) Nicolas Patry 2023-12-11 12:46:30 +0100