text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-11 04:14:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

9b6db5f793

Support tools (#1587) drbh 2024-02-28 05:10:27 -0500
c84223590b add medusa OlivierDehaene 2024-02-28 11:02:39 +0100
a56bd736e6 feat: add starcoder2 OlivierDehaene 2024-02-26 17:47:12 +0100
2122acc60f

Add warmup for all possible shapes for prefill #49 (#81) Karol Damaszke 2024-02-28 10:40:13 +0100
31bed905d4

Update habana profiler (#50) (#80) Karol Damaszke 2024-02-28 09:57:40 +0100
d31fb62576

Add more info to high-level profiler events (#46) (#79) Karol Damaszke 2024-02-28 09:55:50 +0100
4bf58907d0 fix: adjust typos in docs drbh 2024-02-28 04:03:52 +0000
b5cacca1dc fix: update tests for streaming tools drbh 2024-02-28 03:56:37 +0000
0fc7237380 feat: support streaming and improve docs drbh 2024-02-28 02:32:02 +0000
7c04b6d664 fix: add guidance to toc drbh 2024-02-27 17:56:54 +0000
4a81dd042f feat: improve tool serialization drbh 2024-02-27 17:52:46 +0000
f72155ae46 feat: add docs and address syntax tweaks drbh 2024-02-27 16:54:29 +0000
960cc95a0e

Update speculation.md adding_docs Nicolas Patry 2024-02-27 15:55:37 +0100
b6922d48de Add the speculation docs. Nicolas Patry 2024-02-27 15:49:58 +0100
941d36f3fd

Enable deferred token generation (#44) (#75) Karol Damaszke 2024-02-27 15:46:40 +0100
cea291718e Adding some docs. Nicolas Patry 2024-02-27 15:38:02 +0100
6248c5610e

Revert "Prefer prefill instead of decode when max_waiting_tokens==0 (#18)" (#45) (#76) Karol Damaszke 2024-02-27 11:56:45 +0100
a42dc2027b update commit feat/flash_decoding OlivierDehaene 2024-02-27 11:24:07 +0100
ef99678798 wip not faster OlivierDehaene 2024-01-25 15:26:51 +0100
bf700e7eef

Revamp medusa implementation so that every model can benefit. (#1588) Nicolas Patry 2024-02-26 19:49:28 +0100
5f8526235a feat: deprecate suffix and completion template drbh 2024-02-26 18:22:28 +0000
e69e68c8ea Small fixes in the weights loading logic. Nicolas Patry 2024-02-26 17:32:42 +0000
915e5f088c Forgot docker launcher. Nicolas Patry 2024-02-26 17:07:54 +0000
bfec09ecc2 Fixing revision for the medusa test. Nicolas Patry 2024-02-26 16:31:40 +0000
83b059bd27

Bulk shifting (#40) (#70) jkaniecki 2024-02-26 17:29:56 +0100
e672f976fb Fix . Nicolas Patry 2024-02-26 16:31:01 +0100
de421dc53e feat: remove debug cuda avoid drbh 2024-02-26 15:19:07 +0000
fa40801fb6 Specify revision to force use safetensors files. Nicolas Patry 2024-02-26 15:24:48 +0100
7a37655d8e feat: improve client for tools and fix default choice drbh 2024-02-26 14:18:09 +0000
1445b9517d Remove dead file. Nicolas Patry 2024-02-26 15:15:02 +0100
c7793235d0 Download safetensors directly. Nicolas Patry 2024-02-26 11:25:12 +0000
680a52f2f2 Fix GPT2 detection. Nicolas Patry 2024-02-26 11:20:39 +0000
af7ebc2639 fix: avoid long runnning test drbh 2024-02-26 04:29:35 +0000
eb762a9087 fix: avoid seed change drbh 2024-02-25 14:00:51 +0000
7ec33206e6 fix: update grammar tests drbh 2024-02-25 13:59:53 +0000
8f4aba6ad3

Update dependencies (#69) regisss 2024-02-25 13:07:47 +0100
ba39951df2

Merge branch 'main' into qwen2 Cheng Kuan Yong Jason 2024-02-24 15:48:57 +0800
a29893486e Added test cases Jason Cheng 2024-02-24 15:42:56 +0800
a32d3dd6cb feat: improve tools api and add tool prompt drbh 2024-02-24 01:58:54 +0000
ed95f1982d Fix gemma + medusa. Nicolas Patry 2024-02-23 21:13:34 +0000
a0095b5b8d Fixing. Nicolas Patry 2024-02-23 15:10:08 +0000
cd57f9c632 fix: avoid duplicate bos token fix-gemma-tokenization drbh 2024-02-23 14:53:18 +0000
bcd5c8e599 fix: update names and snaps drbh 2024-02-23 14:31:33 +0000
c3bd8ef445

Add Fp8 support (#42) (#71) jkaniecki 2024-02-23 11:52:28 +0100
a490847702

Sequence bucketing for prefill (#39) (#67) jkaniecki 2024-02-23 01:52:14 +0100
b40725e698

Merge branch 'huggingface:main' into main dstnluong-google 2024-02-22 14:45:06 -0800
0863dee463 import logger dstnluong-google 2024-02-22 22:11:42 +0000
c02a42db93 import os dstnluong-google 2024-02-22 22:07:10 +0000
39fae920d8 typo dstnluong-google 2024-02-22 22:00:15 +0000
6690daec09 feat: update tests drbh 2024-02-22 20:05:58 +0000
d2635dd01b fix: prefer seed 1 in all cases drbh 2024-02-22 18:51:02 +0000
0e30e65822 feat: respect tool choice drbh 2024-02-22 18:26:49 +0000
3ec57acac1 fix: update tests and snaps drbh 2024-02-22 17:34:02 +0000
f592df5234 Fix MPT, not sure about idefics. Nicolas Patry 2024-02-22 16:08:15 +0000
c7caac47f8 fix: update snapshot drbh 2024-02-22 14:11:14 +0000
e04c8981d1 fix: trim trailing spaces drbh 2024-02-22 13:32:45 +0000
014d3fd4ef feat: add concrete tool types drbh 2024-02-22 04:19:47 +0000
1aa2126206 fix: add chat docs to client drbh 2024-02-21 18:25:01 -0500
c8f2081171 feat: minimal tool support and chat client drbh 2024-02-16 17:18:21 +0000
0f500f6d14 feat: basic tool support via grammar composition drbh 2024-02-16 16:00:59 +0000
8eb88a7d75

Bump rust version (#41) (#68) jkaniecki 2024-02-22 16:08:34 +0100
ac5a1c6f51

fix: avoid default message (#1579) drbh 2024-02-22 08:56:42 -0500
64d38afa9f Black. Nicolas Patry 2024-02-22 13:01:43 +0000
9ad6086250

Improve habana profile dev experience (#36) (#65) jkaniecki 2024-02-22 13:57:45 +0100
7a9998d47c Remove the old logic. Nicolas Patry 2024-02-22 12:32:46 +0000
21b3072288 Small updates. Nicolas Patry 2024-02-22 12:06:36 +0000
ac419f5e46 Upgrade ALL the code. Nicolas Patry 2024-02-22 11:37:05 +0000
f7ef414e38

Remove unused pad_token_id for filter (#35) (#64) jkaniecki 2024-02-22 11:24:09 +0100
8f590759e3

Prefill optimization by allocating space only for the first output token (#34) (#62) jkaniecki 2024-02-22 04:55:43 +0100
03fb94b853 gs:// model_id is already set to /tmp/gcs_model/ dstnluong-google 2024-02-21 22:03:55 +0000
666b75ea87 Move GCS install to requirements files. dstnluong-google 2024-02-21 21:58:46 +0000
ee9b5a2be6 nit: Rename to Gemma dstnluong-google 2024-02-21 21:54:31 +0000
74e09e6594

Merge branch 'huggingface:main' into main dstnluong-google 2024-02-21 13:53:21 -0800
2446f3ec32 [Tmp] Revamping medusa to make it orthogonal. Nicolas Patry 2024-02-21 21:37:27 +0000
c64866e05a

exclude ubuntu.com domain Guillaume LEGENDRE 2024-02-21 19:45:45 +0100
e61f124f63

fix Guillaume LEGENDRE 2024-02-21 19:33:37 +0100
710b760602

fix typo Guillaume LEGENDRE 2024-02-21 19:28:48 +0100
3a85f1bd54

try fixing buildx proxy Guillaume LEGENDRE 2024-02-21 19:27:28 +0100
66f89120b5 fix: add back typo removed variable drbh 2024-02-21 11:55:06 -0500
3e22cdd14c fix: pre commit trailing whitespace typo drbh 2024-02-21 11:41:06 -0500
1724d06f9d fix: adjust typo drbh 2024-02-21 11:32:14 -0500
544f848bde fix: improve completion request params and comments drbh 2024-02-21 11:31:11 -0500
19c0248985 feat: update docs refactor and avoid cuda graphs when unavailable drbh 2024-02-21 11:17:41 -0500
90d6330819 fix: add missing imports drbh 2024-02-21 10:52:46 -0500
07ac99a93f

Merge branch 'main' into support-legacy-completions-api drbh 2024-02-21 10:48:28 -0500
d0d0fd24a8

update tailscale action version Guillaume LEGENDRE 2024-02-21 15:43:58 +0100
92ab9d2ee6

change runner and remove tailscale userspace for amd Guillaume LEGENDRE 2024-02-21 15:41:05 +0100
80303b469c

Do not limit hpu graphs by default (#32) (#61) jkaniecki 2024-02-21 15:38:00 +0100
383478758b

fix tailscale Guillaume LEGENDRE 2024-02-21 15:36:48 +0100
010508cec8

fix: fix openapi schema (#1586) OlivierDehaene 2024-02-21 15:30:45 +0100
85b224b108 update OlivierDehaene 2024-02-21 15:30:28 +0100
4fe79da8f3 fix: fix openapi schema OlivierDehaene 2024-02-21 15:28:46 +0100
9c1cb81cd8

v1.4.2 (#1585) v1.4.2 OlivierDehaene 2024-02-21 14:50:57 +0100
c1bdca91c2 default compat_return_full_text to true OlivierDehaene 2024-02-21 14:49:25 +0100
605e0369c4 Set qkv attention layer bias to True Jason Cheng 2024-02-21 21:38:43 +0800
6b6dec9ea1

Transparent tokenizer uses explicit int32 (#31) (#60) jkaniecki 2024-02-21 14:24:41 +0100
08827bef2e v1.4.2 OlivierDehaene 2024-02-21 14:17:03 +0100
c86f58d37c

feat: add support for Gemma (#1583) OlivierDehaene 2024-02-21 14:15:22 +0100
bb57cb34e0 Added Qwen2 but generation is wrong Jason Cheng 2024-02-21 18:30:57 +0800
ff3e82c880 skip gemma integration tests for now OlivierDehaene 2024-02-21 14:14:00 +0100