text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-12 04:44:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

8a4df6e181

Only n_heads / process_group.size() are necessary. Nicolas Patry 2024-08-28 16:34:58 +0200
8d01848370

Update server tests Nicolas Patry 2024-08-28 15:42:05 +0200
12325564dc

Put back default pure shell. Nicolas Patry 2024-08-28 14:54:05 +0200
f886747949

Oops this doesn't belong here. Nicolas Patry 2024-08-28 14:49:00 +0200
e6ee67f301

Truncating left for radix purposes. Nicolas Patry 2024-08-28 10:53:22 +0200
0a60973166

Fixing the batching tokenization in flash causal lm. Nicolas Patry 2024-08-28 10:34:10 +0200
c6f1a61267

Update the chat test. Nicolas Patry 2024-08-27 23:02:12 +0200
8ac1ffa087

Removing encoder_decoder (seq2seq). Nicolas Patry 2024-08-27 21:11:49 +0200
ccaf1d0030

Fixing the test. Nicolas Patry 2024-08-27 20:03:50 +0200
2cf1f5c00e

Fixing the issue with add_special_tokens not being passed around. Nicolas Patry 2024-08-27 20:02:35 +0200
e0069a3a26

Fixing seqlen with the new vlms. Nicolas Patry 2024-08-27 18:16:35 +0200
9dacac3b15

add_special_tokens is internal only Nicolas Patry 2024-08-27 15:18:47 +0200
55d984d730

Fixed flashinfer version. Nicolas Patry 2024-08-27 15:00:22 +0200
bb9769ed42

Update all models. Nicolas Patry 2024-08-27 14:46:42 +0200
65b94a69bd

Fixing prefix caching for flashdecoding. Nicolas Patry 2024-08-27 14:23:51 +0200
7f1816a4e1

Change add_special_tokens in order to have the correct tokens for chat input and not (since it's super important with the prefixing now) Nicolas Patry 2024-08-27 11:51:29 +0200
f1c0735453

Don't enable prefix caching on VLM just yet. Nicolas Patry 2024-08-27 09:58:19 +0200
e30fb25444

Fixing the default for vlm. Nicolas Patry 2024-08-26 22:45:04 +0200
27b566baa8

Downgrade some logs. Nicolas Patry 2024-08-26 18:30:19 +0200
26e5037de4

This seems to be working. Nicolas Patry 2024-08-26 18:27:28 +0200
f5182c188c

Is this enough to make it work ? Nicolas Patry 2024-08-26 17:43:27 +0200
1568e82548

OVerride the env in server tests. Nicolas Patry 2024-08-26 15:25:03 +0200
682db34b6a

Handling debugger. Nicolas Patry 2024-08-26 14:59:27 +0200
c53968dc45

Remove lambda for cleaner function. Nicolas Patry 2024-08-23 15:37:54 +0200
32f6416358

Upgrade resolution system for less errors in resolution. Nicolas Patry 2024-08-23 15:27:53 +0200
5eb6ea0063

Tmp Nicolas Patry 2024-08-22 14:34:12 +0200
0bf4eb9683

Updated flake lock Nicolas Patry 2024-08-21 09:15:10 +0200
b80593bfa3

Apply suggestions from code review Nicolas Patry 2024-08-21 09:03:28 +0200
8d0220a695

Forgot last default place. Nicolas Patry 2024-08-20 18:17:54 +0200
860b550cdf

Everywhere 1.80 Nicolas Patry 2024-08-20 15:52:31 +0200
344fee0d44

Upgrade to 1.80 because of bitstream... Nicolas Patry 2024-08-20 15:43:42 +0200
17c8a5e574

Update cargo lock ? Nicolas Patry 2024-08-20 15:28:11 +0200
ba1ce20ce8

Updating integration tests with new values with FI/FD. Nicolas Patry 2024-08-20 15:12:41 +0200
ffb6841121

Update lock Nicolas Patry 2024-08-20 12:08:33 +0200
f0b35f94b8

More specific codes. Nicolas Patry 2024-08-20 12:05:40 +0200
a6cd5fef23

Disable prefix caching for lora. Nicolas Patry 2024-08-20 09:14:57 +0200
cba59aca03

Disabling flashinfer/prefix caching on odd head_dim Nicolas Patry 2024-08-19 16:56:06 +0200
f55278de2d

Allowing window_left_size (dummy version). Nicolas Patry 2024-08-17 12:04:21 +0200
f2bdc65098

Using prebuilt. Nicolas Patry 2024-08-17 00:42:51 +0200
9d4c5d39fe

Include flashinfer in the docker. Nicolas Patry 2024-08-16 23:50:37 +0200
60719babf6

Making prefix/flashinfer the default and testing the full release tests. Nicolas Patry 2024-08-16 14:16:45 +0200
21187c27c9

fix: bump minijinja version and add test for llama 3.1 tools (#2463) drbh 2024-08-27 13:31:08 -0400
5e14f5bed7 fix: add to redocly ignore and lint drbh 2024-08-27 17:01:15 +0000
8bfa11f636 fix: update docs with new endpoint drbh 2024-08-27 16:59:33 +0000
a76bd78486 fix: revert route typo drbh 2024-08-27 16:34:37 +0000
997d7a102a fix: remove unused type import drbh 2024-08-27 16:33:40 +0000
b348ab4c55 Merge branch 'support-openai-models-endpoint' of github.com:huggingface/text-generation-inference into support-openai-models-endpoint drbh 2024-08-27 16:31:50 +0000
1b8f384ce2 fix: adjust comment typo drbh 2024-08-27 16:26:50 +0000
25a0ea6674 fix: prefer minijinja native methods and prefer workspace level dependency drbh 2024-08-27 16:25:23 +0000
8fd5639e9f fix: support tojson and avoid message indexing issue in template drbh 2024-08-27 15:05:43 +0000
2788d41a76

Fixing CI. (#2462) Nicolas Patry 2024-08-27 15:33:02 +0200
f1a94fb009

Fixing CI. Nicolas Patry 2024-08-27 15:24:11 +0200
fde061ccf8

Updated docker image version to 2.0.4 (#212) Thanaji Rao Thakkalapelli 2024-08-27 01:14:27 -0700
8398d4f436 feat: add /v1/models endpoint drbh 2024-08-19 16:00:48 +0000
cfa73b5c99

Pr 2451 ci branch (#2454) drbh 2024-08-26 20:19:38 -0400
57a8038d05 fix: increase test client timeout for grammar compilation tests drbh 2024-08-26 21:14:32 +0000
20db2c3db8 feat: avoid skip tool test and avoid empty tool prompts drbh 2024-08-26 19:15:05 +0000
1f72dcf062 fix: simplify tool grammar logic and improve schema drbh 2024-08-26 17:59:21 +0000
8b45d82897 fix: adjust non tool template apply drbh 2024-08-25 19:12:59 +0000
1bf0e3b65c feat: refactor tool logic to include notify_error in prompt and adjust typing drbh 2024-08-23 21:07:43 +0000
9ea34977ac feat: improve default tool serialization and lints drbh 2024-08-23 18:05:40 +0000
2ee98c7c07 fix[router]: Fix tools not passed in chat template Simone Rossi 2024-08-22 15:48:37 +0000
30be188400

Fix: don't apply post layernorm in SiglipVisionTransformer (#2459) drbh 2024-08-26 17:04:46 -0400
6256b81baf fix: adjust pali gemma for post layer norm and small refactors drbh 2024-08-26 19:35:39 +0000
2985503900

llava-next Fp8 (#209) yuanwu2017 2024-08-26 22:53:08 +0800
55d60a103c

Add qwen2 fp8 support (#210) Wang, Chang 2024-08-26 17:02:58 +0800
e33db1877c

Updated Readme to use flash attention for llama (#200) Thanaji Rao Thakkalapelli 2024-08-26 02:01:11 -0700
c925bd2872

Undo disable of hpu graphs for starcoder (#201) Vidya Galli 2024-08-26 01:58:01 -0700
0c3239e710

Enable quantization with INC (#203) Thanaji Rao Thakkalapelli 2024-08-26 01:55:37 -0700
ea48ae169a

Make prefill time of static benchmark correct (#214) Sun Choi 2024-08-26 01:51:28 -0700
a8cead1f92

Upgrade SynapseAI version to 1.17.0 (#208) yuanwu2017 2024-08-26 16:49:29 +0800
b84303e2e9

Fix: don't apply post layernorm in SiglipVisionTransformer Travis Addair 2024-08-24 23:41:23 -0700
f3c5d7d92f

nix: add default package (#2453) Daniël de Kok 2024-08-23 22:06:22 +0200
dd89e0d24c nix: add default package Daniël de Kok 2024-08-23 06:22:09 +0000
e152cb022b fix: also show total memory after full warmup avoid-cuda-graph-during-warmup-if-oom drbh 2024-08-22 17:57:51 +0000
8b4cd2a9fc fix: skip cuda graphs that will oom and improve free memory logging drbh 2024-08-22 17:49:17 +0000
9a3e838079

fix[router]: Fix tools not passed in chat template Simone Rossi 2024-08-22 15:48:37 +0000
0b02d45a05 add gptq and awq int4 support in intel platform Wang, Yi A 2024-08-21 22:47:34 -0700
0b3384762b

Update Dockerfile_intel Tyler Titsworth 2024-08-21 15:39:50 -0700
358ceb67dd

nix: add awq-inference-engine as server dependency (#2442) Daniël de Kok 2024-08-21 22:20:03 +0200
c98dbdb8c9 nix: add awq-inference-engine as server dependency Daniël de Kok 2024-08-21 20:09:39 +0000
d33fb9ed2c extracting traceparent from header to span fix/op-trace-id erikkaum 2024-08-21 11:28:50 +0200
2652e209e7

Updated flake lock prefix_default Nicolas Patry 2024-08-21 09:15:10 +0200
3ece76392b

Apply suggestions from code review Nicolas Patry 2024-08-21 09:03:28 +0200
cdbf73eef8

Forgot last default place. Nicolas Patry 2024-08-20 18:17:54 +0200
3d46783f1a

Everywhere 1.80 Nicolas Patry 2024-08-20 15:52:31 +0200
e2319fa891

Upgrade to 1.80 because of bitstream... Nicolas Patry 2024-08-20 15:43:42 +0200
f628886c0a

Update cargo lock ? Nicolas Patry 2024-08-20 15:28:11 +0200
2fe5879816

Updating integration tests with new values with FI/FD. Nicolas Patry 2024-08-20 15:12:41 +0200
e48e07c04b

Update lock Nicolas Patry 2024-08-20 12:08:33 +0200
bd0ced354d

More specific codes. Nicolas Patry 2024-08-20 12:05:40 +0200
f5ee062cbd

Disable prefix caching for lora. Nicolas Patry 2024-08-20 09:14:57 +0200
719d7b4d54

Disabling flashinfer/prefix caching on odd head_dim Nicolas Patry 2024-08-19 16:56:06 +0200
7857910435

Allowing window_left_size (dummy version). Nicolas Patry 2024-08-17 12:04:21 +0200
73fd04d60a

Using prebuilt. Nicolas Patry 2024-08-17 00:42:51 +0200
5336755358

Include flashinfer in the docker. Nicolas Patry 2024-08-16 23:50:37 +0200
52c813527a

Making prefix/flashinfer the default and testing the full release tests. Nicolas Patry 2024-08-16 14:16:45 +0200
310778e02a

Adding eetq to flake. (#2438) Nicolas Patry 2024-08-21 09:06:33 +0200
cbbfe8eb2a

Adding eetq to flake. Nicolas Patry 2024-08-21 09:05:56 +0200
9474415095

nix: add text-generation-benchmark to pure devshell (#2431) Daniël de Kok 2024-08-21 07:48:13 +0200