text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-09 03:14:53 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

revert-3323-revert-3313-bump-transformers-455

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3326

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

a5632a6a24 add semver tags OlivierDehaene 2023-02-03 12:30:01 +0100
87dfc4e2c1 increase semver OlivierDehaene 2023-02-03 12:25:20 +0100
a7d15c38e8 refactor doc OlivierDehaene 2023-02-03 12:03:50 +0100
5de40eb078 formatting OlivierDehaene 2023-02-02 18:59:21 +0100
4d00990ccd host swagger w/ github pages OlivierDehaene 2023-02-02 18:58:11 +0100
109c5af615 finalize openAPI schemas OlivierDehaene 2023-02-02 18:37:07 +0100
2878c43cc5 feat(router): add openAPI schemas OlivierDehaene 2023-02-02 17:31:52 +0100
b1482d9048

breaking(router): modify /generate API to only return generated text (#50) OlivierDehaene 2023-02-02 15:02:04 +0100
8659560f7c skip santacoder tests OlivierDehaene 2023-02-02 15:01:21 +0100
f36e736723 breaking(router): modify /generate API to only return generated text OlivierDehaene 2023-02-01 18:38:30 +0100
7b870e1e18

feat(router): use background task to manage request queue (#52) OlivierDehaene 2023-02-02 14:59:27 +0100
dd9f417b8a formatting OlivierDehaene 2023-02-02 14:18:25 +0100
e92eb15d45 rename OlivierDehaene 2023-02-02 14:17:20 +0100
d2d5394991 improved naming OlivierDehaene 2023-02-02 14:12:05 +0100
0c93da571b improved comments OlivierDehaene 2023-02-02 13:01:59 +0100
3f963d8a00 formatting OlivierDehaene 2023-02-02 12:55:46 +0100
9f45182cfd fix tests OlivierDehaene 2023-02-02 12:55:32 +0100
c863f05cfd feat(router): rework db to use a background task OlivierDehaene 2023-02-02 12:54:56 +0100
df227ac20d

fix(server): allow greedy repetition penalty (#51) OlivierDehaene 2023-02-02 10:34:35 +0100
f81851c202 fix(server): allow greedy repetition penalty OlivierDehaene 2023-02-02 10:34:08 +0100
775115e3a5

feat(server): allow the server to use a local weight cache (#49) OlivierDehaene 2023-02-01 16:22:10 +0100
4293e48083 feat(server): allow the serve to use a local weight cache OlivierDehaene 2023-02-01 16:21:25 +0100
313194f6d7

feat(server): support repetition penalty (#47) OlivierDehaene 2023-02-01 15:58:42 +0100
651403c325 formatting OlivierDehaene 2023-02-01 15:30:37 +0100
c25fd1e2e8 fix all_input_ids shape OlivierDehaene 2023-02-01 15:30:09 +0100
2ad895a6cc

feat(server): allow gpt-neox models with odd vocab sizes to be sharded (#48) OlivierDehaene 2023-02-01 14:43:59 +0100
3149317fa1 formatting OlivierDehaene 2023-02-01 11:48:18 +0100
1d0fa38cb8 feat(server): allow gpt-neox models with odd vocab sizes to be sharded OlivierDehaene 2023-02-01 11:47:32 +0100
404ed7a1f6

feat(ci): Docker build and push (#46) OlivierDehaene 2023-01-31 20:14:05 +0100
04f3b1c93e add caching OlivierDehaene 2023-01-31 20:06:00 +0100
34fc1e5cc6 feat(server): support repetition penalty OlivierDehaene 2023-01-31 20:03:18 +0100
a2aeec9331 feat(ci): Docker build and push OlivierDehaene 2023-01-31 19:13:39 +0100
f830706b21

feat(server): Support GPT-Neox (#39) OlivierDehaene 2023-01-31 18:53:56 +0100
b4455b241b update readme OlivierDehaene 2023-01-31 18:38:31 +0100
7df81c34db patch quantization OlivierDehaene 2023-01-31 18:34:47 +0100
ffccb7f9ce feat(server): Support GPT-Neox OlivierDehaene 2023-01-30 20:51:48 +0100
c6e8b9442b

fix(server): fix quantization for sharded models (#45) OlivierDehaene 2023-01-31 17:40:38 +0100
4858f122db formatting OlivierDehaene 2023-01-31 17:38:12 +0100
ca11e9e8c3 fix(server): fix quantization for sharded models OlivierDehaene 2023-01-31 17:37:50 +0100
017a2a8c2f

feat: Add token streaming using ServerSideEvents support (#41) OlivierDehaene 2023-01-31 17:04:00 +0100
41767b651f use u32 OlivierDehaene 2023-01-31 16:51:32 +0100
d5ab76cdfb use Rust type system to validate logic OlivierDehaene 2023-01-31 16:47:06 +0100
614a1a7202 modify integration tests OlivierDehaene 2023-01-30 16:32:44 +0100
f8e230f65c formating OlivierDehaene 2023-01-30 16:17:32 +0100
6d024e5708 support seeding OlivierDehaene 2023-01-30 16:16:58 +0100
5ef1336997 docstring OlivierDehaene 2023-01-30 12:36:04 +0100
42cdb734a5 working python tests OlivierDehaene 2023-01-30 12:18:53 +0100
4a538cfa49 working integration tests OlivierDehaene 2023-01-30 11:37:36 +0100
429155a26a Improved version OlivierDehaene 2023-01-30 10:55:54 +0100
122c137b56 rust code cleanup OlivierDehaene 2023-01-28 09:31:37 +0100
48d095733a black OlivierDehaene 2023-01-27 19:52:14 +0100
432566d931 wip OlivierDehaene 2023-01-27 19:46:58 +0100
54fec93193

fix(server): fix seeding with multiple shards (#44) OlivierDehaene 2023-01-31 16:01:15 +0100
18b0923d01 fix(server): fix seeding with multiple shards OlivierDehaene 2023-01-31 16:00:17 +0100
03bdf18290

fix(server): fix seeding on gpu (#42) OlivierDehaene 2023-01-31 14:30:33 +0100
28e5cbada5 fix(server): fix seeding on gpu OlivierDehaene 2023-01-31 14:29:58 +0100
4f9ac67cfa

Revert "feat: Add token streaming using ServerSideEvents support" (#40) OlivierDehaene 2023-01-31 14:21:51 +0100
0e543e167e Revert "feat: Add token streaming using ServerSideEvents support (#36)" OlivierDehaene 2023-01-31 14:21:13 +0100
7fbfbb0dc5

feat: Add token streaming using ServerSideEvents support (#36) OlivierDehaene 2023-01-31 11:49:43 +0100
1c10776cde modify integration tests OlivierDehaene 2023-01-30 16:32:44 +0100
3fc811f596 formating OlivierDehaene 2023-01-30 16:17:32 +0100
7d633582e4 support seeding OlivierDehaene 2023-01-30 16:16:58 +0100
ab2f784f29 docstring OlivierDehaene 2023-01-30 12:36:04 +0100
adf80bc23d working python tests OlivierDehaene 2023-01-30 12:18:53 +0100
b2a468176d working integration tests OlivierDehaene 2023-01-30 11:37:36 +0100
046801278e Improved version OlivierDehaene 2023-01-30 10:55:54 +0100
0b34905557 rust code cleanup OlivierDehaene 2023-01-28 09:31:37 +0100
8c2ddfe838 black OlivierDehaene 2023-01-27 19:52:14 +0100
d917ae8955 wip OlivierDehaene 2023-01-27 19:46:58 +0100
cd298bc5e5

feat: Support sampling seeding (#37) OlivierDehaene 2023-01-30 15:36:16 +0100
9285f67be5 black OlivierDehaene 2023-01-30 15:15:34 +0100
93f6acc396 feat: Support sampling seeding OlivierDehaene 2023-01-30 14:36:36 +0100
a8ddf45c11 cleanup Yannic Kilcher 2023-01-26 14:57:49 +0100
033d2174fd cleanup Yannic Kilcher 2023-01-26 14:57:39 +0100
d37b2d3fb9 added streaming endpoint Yannic Kilcher 2023-01-26 14:50:57 +0100
1539d3cbbe

feat(router): Remove second lock from batcher hot path (#27) OlivierDehaene 2023-01-26 16:29:13 +0100
b96fe73beb use IntMap OlivierDehaene 2023-01-26 16:06:34 +0100
67ee1907fc feat(router): Remove second lock from batcher hot path OlivierDehaene 2023-01-20 14:06:33 +0100
ce960be0a5

feat(bloom): use torch.nn.Linear and torch.nn.GELU (#33) OlivierDehaene 2023-01-26 15:33:45 +0100
6e43ef51ba feat(bloom): use torch.nn.Linear and torch.nn.GELU OlivierDehaene 2023-01-26 15:33:14 +0100
9cfd41e03b cleanup Yannic Kilcher 2023-01-26 14:57:49 +0100
65efd51233 cleanup Yannic Kilcher 2023-01-26 14:57:39 +0100
7beb968696 Merge branch 'main' of github.com:huggingface/text-generation-inference Yannic Kilcher 2023-01-26 14:51:07 +0100
b1ef80583c added streaming endpoint Yannic Kilcher 2023-01-26 14:50:57 +0100
13e7044ab7

fix(dockerfile): fix docker build (#32) OlivierDehaene 2023-01-24 19:52:39 +0100
acf45830e7 fix(dockerfile): fix docker build OlivierDehaene 2023-01-24 19:52:18 +0100
5c01e2544c

fix(router): fix api-inference deployment (#31) OlivierDehaene 2023-01-23 17:42:14 +0100
087b4c2721 fix(router): fix api-inference deployment OlivierDehaene 2023-01-23 17:41:42 +0100
ab2ad91da3

fix(docker): fix api-inference deployment (#30) OlivierDehaene 2023-01-23 17:33:08 +0100
507a8d5847 fix(docker): fix api-inference deployment OlivierDehaene 2023-01-23 17:32:15 +0100
f9d0ec376a

feat(docker): Make the image compatible with api-inference (#29) OlivierDehaene 2023-01-23 17:11:27 +0100
c655f1cdf2 feat(docker): Make the image compatible with api-inference OlivierDehaene 2023-01-23 17:10:37 +0100
f31b8a7fed A small simplification and add a few more comments Nick Hill 2023-01-19 11:48:32 -0800
d0ccada7c0 Proposal: Use bounded queue instead of database Nick Hill 2023-01-18 12:21:12 -0800
1f570d181f

fix(server): Fix position ids (#28) OlivierDehaene 2023-01-20 15:35:22 +0100
bc18dbd980 skip santacoder tests OlivierDehaene 2023-01-20 15:34:11 +0100
a8d7e94d13 fix(server): Fix position ids OlivierDehaene 2023-01-20 15:33:03 +0100
15511edc01

feat(server): Support SantaCoder (#26) OlivierDehaene 2023-01-20 12:24:39 +0100
8d4baa14d2 feat(server): Support SantaCoder OlivierDehaene 2023-01-20 12:15:37 +0100
f7ac394935

fix(router): Obey max batch size (#23) Nick Hill 2023-01-17 00:11:21 -0800