text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-10 20:04:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

27ecef5153 Using kernel like Makefile instead. Nicolas Patry 2023-09-25 08:56:13 +0000
757cf1783d Declare torch as build dep. Nicolas Patry 2023-09-25 08:24:34 +0000
e08f3ac555 Add git to docker. Nicolas Patry 2023-09-25 08:17:49 +0000
8ee9307618 Finishing nits + integration test Nicolas Patry 2023-09-25 10:07:45 +0200
c35f39cf83

Add AWQ quantization inference support (#1019) dev Abhinav M Kulkarni 2023-09-25 13:28:02 +0530
649d9754b1 fix discard_names in safetensors convertion zhangsibo1129 2023-09-25 10:43:28 +0800
de5098e013 Added AWQ in REDME Abhinav Kulkarni 2023-09-23 10:36:44 +0000
054930fbbe Minor refactor Abhinav Kulkarni 2023-09-23 10:12:26 +0000
5d0973f484 Refactored WQLinear Abhinav Kulkarni 2023-09-23 09:48:27 +0000
ba22ef54d4 pass max_total_tokens info through warmup, python could get max_total_tokens as truncate+max_new_tokens in warmup Wang, Yi A 2023-09-21 20:50:50 -0700
dac2348ab0 added my fork fix Merve Noyan 2023-09-21 12:57:15 +0200
a44a82b1b6 added my fork Merve Noyan 2023-09-21 12:56:40 +0200
0c77c75ad0 added my fork Merve Noyan 2023-09-21 12:55:12 +0200
a9753a5b70 removed main.rs change condition to test Merve Noyan 2023-09-21 12:51:55 +0200
c6590fd1bb added git push back Merve Noyan 2023-09-21 12:47:57 +0200
94065cd81a added print and removed launcher.md to test Merve Noyan 2023-09-21 12:46:10 +0200
e40b942389 added if merged Merve Noyan 2023-09-21 12:43:45 +0200
7206fe3163 changed actions since it runs on repo level, added cargo build Merve Noyan 2023-09-21 12:23:52 +0200
2bddc78dca fixes Merve Noyan 2023-09-21 12:17:47 +0200
86736cb93a added codeblock, include removal of md file Merve Noyan 2023-09-21 12:06:15 +0200
4cab978343 removed GH token env var Merve Noyan 2023-09-21 11:47:26 +0200
1da6e241cf minor fix and removed credentials Merve Noyan 2023-09-21 11:46:06 +0200
c2eaa28e4b autodocs Merve Noyan 2023-09-21 11:41:57 +0200
e19d0e7867 Fixing t5 loading. Nicolas Patry 2023-09-21 08:29:48 +0200
123749a3c9

Fix missing arguments in Galactica's from_pb (#1022) Vincent Brouwers 2023-09-21 08:15:59 +0200
eeaa22ab04

enable bfloat16 for cpu (#1034) Wang, Yi 2023-09-19 23:19:28 +0800
c44fce6c09 enable bfloat16 for cpu Wang, Yi A 2023-09-18 03:14:50 -0700
f85a6f853e

Merge branch 'huggingface:main' into abhinavkulkarni/add-awq-support Abhinav M Kulkarni 2023-09-18 23:39:26 +0530
c58953398a

Install curl within base image to be able to perform more advanced healthchecks Raphael 2023-09-15 17:41:34 +0200
00359fcdc5 fix indent issue bangoz 2023-09-13 16:33:12 +0000
2a16b4101f Fix top_n_tokens returning non-log probs for some models Vincent Brouwers 2023-09-14 08:49:35 +0000
9b4545f279 Fix missing arguments in Galactica's from_pb Vincent Brouwers 2023-09-14 08:40:19 +0000
acb7e1d465 Added quantize_config.json support for AWQ Abhinav Kulkarni 2023-09-13 11:12:32 +0000
00dede8a63 Added AWQ support for FlashLlama models Abhinav Kulkarni 2023-09-13 11:08:22 +0000
8ce40f9d25

fix code snippet Merve Noyan 2023-09-12 16:33:35 +0200
c8a01d7591

Unsupported model serving docs (#906) Merve Noyan 2023-09-12 15:55:14 +0200
e9ae678699

Quantization docs (#911) Merve Noyan 2023-09-12 15:52:46 +0200
0966704dd6

Update docs/source/basic_tutorials/non_core_models.md Merve Noyan 2023-09-12 15:52:30 +0200
62b05f2ccf

Update docs/source/supported_models.md Merve Noyan 2023-09-12 12:27:16 +0200
6473cf852e

Merge branch 'main' into quantization_docs Merve Noyan 2023-09-12 12:12:00 +0200
1f69fb9ed4

Tensor Parallelism conceptual guide (#886) Merve Noyan 2023-09-12 12:11:20 +0200
6703fd3009

Update installation.md albertodepaola 2023-09-11 16:29:03 -0300
33958e0989 Start. speculative Nicolas Patry 2023-09-11 18:25:49 +0000
4cce84301b

fit for baichuan models (#981) xiaobin 2023-09-08 22:51:34 +0800
e349f57d10 Update solution to account for GPTQ. Nicolas Patry 2023-09-08 14:36:49 +0000
0357de7022

Merge branch 'main' into tp-docs Merve Noyan 2023-09-08 14:20:00 +0200
9381797626

Merge branch 'main' into quantization_docs Merve Noyan 2023-09-08 15:19:11 +0300
30a93a0dec

Paged Attention Conceptual Guide (#901) Merve Noyan 2023-09-08 15:18:42 +0300
704cd18402

Iterated on Pedro's comments Merve Noyan 2023-09-08 13:01:58 +0200
e82259106c

Update docs/source/conceptual/quantization.md Merve Noyan 2023-09-08 12:55:45 +0200
6cb066eb01 raise exception on invalid images Leo Tronchon 2023-09-08 12:35:13 +0200
8acd649c56

Merge branch 'main' into quantization_docs Merve Noyan 2023-09-07 19:47:53 +0300
2faf396128

Merge branch 'main' into paged-attention-docs Merve Noyan 2023-09-07 19:47:26 +0300
53e89e7ae7

Merge branch 'main' into tp-docs Merve Noyan 2023-09-07 19:46:49 +0300
5a5b4ef954

Clarified flag Merve Noyan 2023-09-07 18:42:33 +0200
0a63e9ab68

Fix __call__ vs forward. (#993) Nicolas Patry 2023-09-07 17:36:30 +0200
7f48a61bce

Update docs/source/conceptual/quantization.md Merve Noyan 2023-09-07 16:49:33 +0200
47db26298a

Update docs/source/conceptual/quantization.md Merve Noyan 2023-09-07 16:49:22 +0200
12d9a67752

Fix inline latex Merve Noyan 2023-09-07 16:46:05 +0200
9a0a4d926c

nit Merve Noyan 2023-09-07 17:24:14 +0300
af1ed38f39

Safetensors conceptual guide (#905) Merve Noyan 2023-09-07 17:22:06 +0300
eb8f59083d

Added note on weight-cache-override Merve Noyan 2023-09-07 16:20:56 +0200
873573150f

Update non_core_models.md Merve Noyan 2023-09-07 16:06:18 +0200
07bc903d6e Fix __call__ vs forward. Nicolas Patry 2023-09-07 14:02:34 +0000
4d12840986

Update docs/source/basic_tutorials/non_core_models.md Merve Noyan 2023-09-07 15:25:54 +0200
061b6a9c21

Update docs/source/basic_tutorials/non_core_models.md Merve Noyan 2023-09-07 15:24:00 +0200
ecaa9d6f8e

Update docs/source/conceptual/tensor_parallelism.md Merve Noyan 2023-09-07 14:55:43 +0200
b23ad5d1e4

Update docs/source/conceptual/tensor_parallelism.md Merve Noyan 2023-09-07 14:54:03 +0200
099291a061

Update docs/source/conceptual/tensor_parallelism.md Merve Noyan 2023-09-07 14:53:30 +0200
0ef535e77e

Merge branch 'main' into safetensors_docs Merve Noyan 2023-09-07 15:47:55 +0300
73d4f92e0e

Merge branch 'main' into paged-attention-docs Merve Noyan 2023-09-07 15:47:15 +0300
0fcd2b4727

Update docs/source/conceptual/paged_attention.md Merve Noyan 2023-09-07 14:46:49 +0200
9973f4041c

Update docs/source/conceptual/paged_attention.md Merve Noyan 2023-09-07 14:46:39 +0200
5d27a467eb

Update docs/source/conceptual/paged_attention.md Merve Noyan 2023-09-07 14:46:29 +0200
41cd2e350c

Update docs/source/conceptual/paged_attention.md Merve Noyan 2023-09-07 14:46:21 +0200
90930a537c

Update docs/source/_toctree.yml Merve Noyan 2023-09-07 14:46:09 +0200
5ec7b1a2af

Update docs/source/conceptual/paged_attention.md Merve Noyan 2023-09-07 14:46:03 +0200
b03d2621a7

add transformers gptq support (#963) Florian Zimmermeister 2023-09-07 10:19:42 +0200
935a77fb74

Fix exllama wronfully loading (#990) Maxime Laboissonnière 2023-09-07 03:17:22 -0400
9f9cb924e0 Merge branch 'fix_exllama_wronfully_loading' of github.com:maximelaboisson/text-generation-inference into fix_exllama_wronfully_loading Maxime Laboissonniere 2023-09-06 20:02:32 -0400
afe9c07476 fixing condition Maxime Laboissonniere 2023-09-06 20:02:23 -0400
06a3d19142 fixing condition Maxime Laboissonniere 2023-09-06 19:41:42 -0400
7c8f0a0546

Merge branch 'main' into remove_readme Omar Sanseviero 2023-09-06 22:22:00 +0200
a9fdfb2464

docs: Remove redundant content from stream guide (#884) Omar Sanseviero 2023-09-06 18:42:42 +0200
433cc0f4d9

Update README.md Omar Sanseviero 2023-09-06 16:56:40 +0200
4a21912edf

Update README.md Omar Sanseviero 2023-09-06 16:48:56 +0200
915f2e909c

Update docs/source/conceptual/streaming.md Omar Sanseviero 2023-09-06 16:43:21 +0200
f260eb72f9

docs: Flash Attention Conceptual Guide (#892) Merve Noyan 2023-09-06 16:36:49 +0300
059bb5cf83

chore: sync text-generation version from 0.3.0 to 0.6.0 with pyproject.toml (#950) 王佳欣 2023-09-06 21:20:32 +0800
211e7b7e35

Disabling exllama on old compute. (#986) Nicolas Patry 2023-09-06 15:01:00 +0200
3ed4c0f33f

docs: typo in streaming.js (#971) Julien Bouquillon 2023-09-06 14:57:59 +0200
14bbd311c1 Dummy workaround for CPU. Nicolas Patry 2023-09-06 14:35:02 +0200
1987d37603 Disabling exllama on old compute. Nicolas Patry 2023-09-06 14:20:03 +0200
c8bbbd8129

chore(client): Support Pydantic 2 (#900) Jelle Zijlstra 2023-09-06 05:12:08 -0700
47fbf4495e

Merge branch 'huggingface:main' into main Marcus Dunn 2023-09-05 13:24:20 -0700
2a1f306e26 fit for baichuan models xiaoyuze 2023-09-05 15:57:32 +0800
033230ae66

Backport https://github.com/vllm-project/vllm/pull/936 (#977) Nicolas Patry 2023-09-04 15:00:19 +0200
9aaa184675 Going bacjk on Olivier fork. Nicolas Patry 2023-09-04 14:15:39 +0200
e181a3a761 Backport https://github.com/vllm-project/vllm/pull/936 Nicolas Patry 2023-09-04 12:29:39 +0200
1700d11905 updated rsnm2 2023-09-01 18:29:36 +0000