text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2025-09-10 11:54:52 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

gha_sccache_use_secrets

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

patch_version_3.3.6

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#334

#335

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

e30f4f61e7 Only return top_tokens field when requested Vincent Brouwers 2023-08-02 13:03:19 +0000
65c7b6207c Add max_top_n_tokens CLI argument Vincent Brouwers 2023-08-02 12:42:59 +0000
8471e1862d Defer building top-token objects to Rust Vincent Brouwers 2023-08-01 15:02:30 +0000
730d86f1d0 Skip top-n tokens in prefill Vincent Brouwers 2023-08-01 13:55:38 +0000
564bc99a7b

fix toc Merve Noyan 2023-08-01 14:13:28 +0300
470dcdfe7b

Separated querying section and emphasized self generating docs Merve Noyan 2023-08-01 14:10:45 +0300
21ca70e0eb

Added supported models and hardware Merve Noyan 2023-08-01 14:02:14 +0300
2675d934e5

Update local_launch.md Merve Noyan 2023-08-01 12:44:25 +0300
cdc7db9af9 add FastLinear import p_spozzhang 2023-08-01 13:56:52 +0800
a9aa187d84

Update requirements.txt Sven Schultze 2023-07-31 21:54:17 +0200
7766fee9b1

fix typo for dynamic rotary (#745) compat_logger Florian Zimmermeister 2023-07-31 18:58:46 +0200
d3d8f1bd6b

Typo fix. (#746) Nicolas Patry 2023-07-31 18:57:29 +0200
8a774ab07e

Update layers.py Nicolas Patry 2023-07-31 18:57:14 +0200
a4415e0465

fix typo for dynamic rotary Florian Zimmermeister 2023-07-31 18:55:54 +0200
15fc64668f

fix(server): Failing quantize config after local read. (#743) Nicolas Patry 2023-07-31 17:51:26 +0200
7a2136eeb8 fix(server): Failing quantize config after local read. Nicolas Patry 2023-07-31 15:49:06 +0000
c86dcbeeb1

Update build_pr_documentation.yml Merve Noyan 2023-07-31 18:16:29 +0300
d65bbb333d

Update build_pr_documentation.yml Merve Noyan 2023-07-31 18:13:32 +0300
b2268272ad

Added installation and launch notes and re-structured toc Merve Noyan 2023-07-31 17:35:36 +0300
2a13f1a046

chore: fix typo in mpt_modeling.py (#737) Ikko Eltociear Ashimine 2023-07-31 22:43:44 +0900
932bdd93ff

Adding Rope scaling. (#741) Nicolas Patry 2023-07-31 15:38:47 +0200
d16298b8d4 Allocate top_n_token tensor in Batch Vincent Brouwers 2023-07-31 13:09:45 +0000
41bd0e4af1

Added index.md and other initial files Merve Noyan 2023-07-31 15:56:29 +0300
aa44a3d1f0 Cargo fmt. Nicolas Patry 2023-07-31 12:42:18 +0000
b9633c46d0

Fix typing in Model.generate_token (#733) Jae-Won Chung 2023-07-31 08:35:14 -0400
edbba4ea36 Adding Rope scaling. Nicolas Patry 2023-07-31 11:55:44 +0000
dc631b5be5

Setup for doc-builder and added TOC Merve Noyan 2023-07-31 14:18:20 +0300
92bb56b0c1

Local gptq support. (#738) Nicolas Patry 2023-07-31 10:32:52 +0200
760fdcfe7b Upgrading rust version. Nicolas Patry 2023-07-31 10:08:15 +0200
66cea49d57 Cargo fmt Nicolas Patry 2023-07-31 09:57:18 +0200
4b3e24f843 feat(server): Add bitsandbytes 4bit quantization (#626) krzim 2023-07-21 03:53:05 -0400
f29e3d7d34 Local gptq support. Nicolas Patry 2023-07-31 09:51:58 +0200
a1cec743ee

chore: fix typo in mpt_modeling.py Ikko Eltociear Ashimine 2023-07-31 11:52:53 +0900
bdc76134a3 LICENSE change michaelfeil 2023-07-30 12:22:49 +0200
5a465fa40a Fix typing in Model.generate_token Jae-Won Chung 2023-07-28 17:23:41 -0400
3ef5ffbc64

v1.0.0 (#727) v1.0.0 OlivierDehaene 2023-07-28 17:43:46 +0200
51203e4087 revert vllm change OlivierDehaene 2023-07-28 16:51:33 +0200
95d0fba7de Return more top-n-tokens when probabilities are equal Vincent Brouwers 2023-07-28 14:21:11 +0000
19dc7d31b9 update README OlivierDehaene 2023-07-28 16:26:04 +0200
eca8817183 udpate README OlivierDehaene 2023-07-28 16:21:59 +0200
23446d15db v1.0.0 OlivierDehaene 2023-07-28 16:14:30 +0200
bde25e62b3

chore: update license to HFOIL (#725) OlivierDehaene 2023-07-28 15:59:46 +0200
ce90be833b chore: update license to HFOIL OlivierDehaene 2023-07-28 15:56:36 +0200
afd04dc71e

feat(server): update vllm version (#723) OlivierDehaene 2023-07-28 15:36:38 +0200
dc59fd90ff feat(server): update vllm version OlivierDehaene 2023-07-28 14:47:46 +0200
f848decee6

docs: Add hardware section to TOC in README (#721) regisss 2023-07-28 11:20:03 +0200
8e47e17ada Add section to TOC in README regisss 2023-07-28 10:27:05 +0200
5a1cccbb98

Add section about TGI on other AI hardware accelerators in README (#715) regisss 2023-07-28 09:14:03 +0200
987b0fff3a

Load quantize_config.json from local path Antoni Baum 2023-07-27 18:03:04 -0700
eba543222b Add section about TGI on Gaudi in README regisss 2023-07-27 22:40:53 +0200
9f18f4c006

v0.9.4 (#713) v0.9.4 OlivierDehaene 2023-07-27 19:25:15 +0200
e366dfa0f0 v0.9.4 OlivierDehaene 2023-07-27 18:49:03 +0200
ab96b9aec3

feat(server): support new falcon config (#712) OlivierDehaene 2023-07-27 18:38:57 +0200
e8b0a014a0 feat(server): support new falcon config OlivierDehaene 2023-07-27 17:50:42 +0200
2efd46ef95 fix(server): fix missing datasets in quantize OlivierDehaene 2023-07-27 14:50:45 +0200
8bd0adb135

fix(server): fix quantization python requirements (#708) OlivierDehaene 2023-07-27 12:28:10 +0200
0754eaaf17 fix(server): fix quantization python requirements OlivierDehaene 2023-07-27 12:03:47 +0200
50d05fa20d Implement top-n-tokens for all models Vincent Brouwers 2023-07-26 15:12:57 +0000
494e6b1c61 Share computation for top-n-token decoding Vincent Brouwers 2023-07-25 14:55:32 +0000
f809f179dc Add batched top-n-tokens to FlashCausalLM Vincent Brouwers 2023-07-25 14:17:25 +0000
a7be416c87 Add top-n-tokens support to benchmark Vincent Brouwers 2023-07-24 14:02:56 +0000
7c014c7dfe Add WIP support for returning top tokens Vincent Brouwers 2023-07-14 19:48:15 +0000
d8a955740f

Remove extra workflows orangetin 2023-07-26 20:01:36 -0700
3f031ad51f

Fix workflow orangetin 2023-07-26 19:58:44 -0700
369ad020f4

Ignore flash-attention Yang, Bo 2023-07-26 11:37:01 -0700
9dc53886c3

Ignore external projects Yang, Bo 2023-07-25 11:46:57 -0700
e64a65891b docs(README): update readme OlivierDehaene 2023-07-25 19:45:25 +0200
a0d55358d2

feat(server): Using quantize_config.json instead of GPTQ_BITS env variables. (#671) Nicolas Patry 2023-07-25 12:00:27 +0100
0635d0e245 After rebase. Nicolas Patry 2023-07-25 09:14:47 +0200
95583ee257 Small fix. Nicolas Patry 2023-07-21 10:20:01 +0000
c07ee68b60 feat(server): Using quantize_config.json instead of GPTQ_BITS env variables. Nicolas Patry 2023-07-21 10:12:28 +0000
79b4620107 adding suggested changes for os.environ vars and reporting correct torch.dtype on API michaelfeil 2023-07-25 09:05:08 +0200
e38cda5b9b apply suggested changes michaelfeil 2023-07-25 08:32:55 +0200
9bb64c92a9 Add AutoCausalLM Yang, Bo 2023-07-12 01:07:10 +0000
be6c9acf46 cpu speedup kwargs michaelfeil 2023-07-24 23:13:32 +0200
37df6df38e

fix(server): fix exllama buffers (#689) OlivierDehaene 2023-07-24 14:25:43 +0200
74c87f5888 fmt OlivierDehaene 2023-07-24 13:59:10 +0200
a6057c4076 fix(server): fix exllama buffers OlivierDehaene 2023-07-24 10:41:24 +0200
73a4d65d26

feat: add cuda memory fraction (#659) OlivierDehaene 2023-07-24 11:43:58 +0200
336ea37637 fix other issue and make code pass on cpu. michaelfeil 2023-07-24 11:03:02 +0200
31f45f6351 memory fraction on free memory OlivierDehaene 2023-07-24 10:25:05 +0200
1b59f8da73 feat: add cuda memory fraction OlivierDehaene 2023-07-20 11:29:48 +0200
b2575fd18d adapt trust remote code michaelfeil 2023-07-23 21:18:00 +0200
9b382f9f4a reformat code and imports michaelfeil 2023-07-23 12:20:07 +0200
ccc7b7ab8f Cleanup Antoni Baum 2023-07-22 17:12:37 -0700
d583f962f8 WIP Antoni Baum 2023-07-22 16:03:07 -0700
74c31ee890 improve error handling michaelfeil 2023-07-22 23:50:38 +0200
7338e0097f add requirements to docker michaelfeil 2023-07-22 23:42:30 +0200
3f2fce87e7 reformatted code michaelfeil 2023-07-22 21:54:31 +0200
2da14fcb2a initial commit for running ctranslate2 michaelfeil 2023-07-22 21:34:48 +0200
1da642bd0e feat(server): add local prom and health routes if running w/ ngrok OlivierDehaene 2023-07-21 16:56:30 +0200
15b3e9ffb0

Directly load GPTBigCode to specified device (#618) Yang, Bo 2023-07-21 02:27:31 -0700
d5b5bc750f

feat(server): Add exllama GPTQ CUDA kernel support #553 (#666) Nicolas Patry 2023-07-21 10:59:00 +0200
afb39404e1 Getting closer to the non gptq test (stop sequence doesn't work). Nicolas Patry 2023-07-21 08:15:25 +0000
8b6a262539 Switching model for integration test llama gptq. Nicolas Patry 2023-07-21 07:29:32 +0000
1dc952a674 Wtf gh. Nicolas Patry 2023-07-21 06:26:46 +0000
40be532841 Update starcoder_gptq Nicolas Patry 2023-07-21 06:00:02 +0000
3ec3adde2f Separate build process. Nicolas Patry 2023-07-20 22:09:31 +0000
c6e702fb2f Add kernel target. Nicolas Patry 2023-07-20 20:24:44 +0000
12191b7e42 Fix config. Nicolas Patry 2023-07-20 19:56:31 +0000