remove torch from requirements.txts and re-format

This commit is contained in:
OlivierDehaene 2024-02-16 11:24:46 +01:00
parent 14b5ff0b82
commit af23c432e8
10 changed files with 67 additions and 143 deletions

View File

@ -39,7 +39,7 @@ RUN cargo build --release
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as pytorch-install
ARG PYTORCH_VERSION=2.1.1
ARG PYTORCH_VERSION=2.2.0
ARG PYTHON_VERSION=3.10
# Keep in sync with `server/pyproject.toml
ARG CUDA_VERSION=12.1
@ -225,7 +225,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_cuda.txt && \
pip install ".[bnb, accelerate, quantize, peft]" --no-cache-dir
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

View File

@ -150,7 +150,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_rocm.txt && \
pip install ".[accelerate, peft]" --no-cache-dir
pip install ".[accelerate, peft, outlines]" --no-cache-dir
# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

View File

@ -23,7 +23,7 @@ install-megablocks:
install: gen-server
pip install pip --upgrade
pip install -r requirements_cuda.txt
pip install -e ".[bnb, accelerate, quantize, peft]"
pip install -e ".[bnb, accelerate, quantize, peft, outlines]"
run-dev:
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded

75
server/poetry.lock generated
View File

@ -144,7 +144,7 @@ frozenlist = ">=1.1.0"
name = "annotated-types"
version = "0.6.0"
description = "Reusable constraint types to use with typing.Annotated"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
@ -166,7 +166,7 @@ files = [
name = "attrs"
version = "23.2.0"
description = "Classes Without Boilerplate"
optional = false
optional = true
python-versions = ">=3.7"
files = [
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
@ -331,7 +331,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
name = "cloudpickle"
version = "3.0.0"
description = "Pickler class to extend the standard pickle.Pickler functionality"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"},
@ -429,7 +429,7 @@ profile = ["gprof2dot (>=2022.7.29)"]
name = "diskcache"
version = "5.6.3"
description = "Disk Cache -- Disk and file backed persistent cache."
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
@ -952,7 +952,7 @@ files = [
name = "interegular"
version = "0.3.3"
description = "a regex intersection checker"
optional = false
optional = true
python-versions = ">=3.7"
files = [
{file = "interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c"},
@ -963,7 +963,7 @@ files = [
name = "jinja2"
version = "3.1.3"
description = "A very fast and expressive template engine."
optional = false
optional = true
python-versions = ">=3.7"
files = [
{file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
@ -980,7 +980,7 @@ i18n = ["Babel (>=2.7)"]
name = "joblib"
version = "1.3.2"
description = "Lightweight pipelining with Python functions"
optional = false
optional = true
python-versions = ">=3.7"
files = [
{file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"},
@ -991,7 +991,7 @@ files = [
name = "jsonschema"
version = "4.21.1"
description = "An implementation of JSON Schema validation for Python"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"},
@ -1012,7 +1012,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
name = "jsonschema-specifications"
version = "2023.12.1"
description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
@ -1026,7 +1026,7 @@ referencing = ">=0.31.0"
name = "lark"
version = "1.1.9"
description = "a modern parsing library"
optional = false
optional = true
python-versions = ">=3.6"
files = [
{file = "lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db"},
@ -1043,7 +1043,7 @@ regex = ["regex"]
name = "llvmlite"
version = "0.42.0"
description = "lightweight wrapper around basic LLVM functionality"
optional = false
optional = true
python-versions = ">=3.9"
files = [
{file = "llvmlite-0.42.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3366938e1bf63d26c34fbfb4c8e8d2ded57d11e0567d5bb243d89aab1eb56098"},
@ -1091,7 +1091,7 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils
name = "markupsafe"
version = "2.1.5"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
optional = true
python-versions = ">=3.7"
files = [
{file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
@ -1160,7 +1160,7 @@ files = [
name = "mpmath"
version = "1.3.0"
description = "Python library for arbitrary-precision floating-point arithmetic"
optional = false
optional = true
python-versions = "*"
files = [
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
@ -1300,7 +1300,7 @@ dill = ">=0.3.8"
name = "nest-asyncio"
version = "1.6.0"
description = "Patch asyncio to allow nested event loops"
optional = false
optional = true
python-versions = ">=3.5"
files = [
{file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
@ -1311,7 +1311,7 @@ files = [
name = "networkx"
version = "3.2.1"
description = "Python package for creating and manipulating graphs and networks"
optional = false
optional = true
python-versions = ">=3.9"
files = [
{file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
@ -1329,7 +1329,7 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
name = "numba"
version = "0.59.0"
description = "compiling Python code using LLVM"
optional = false
optional = true
python-versions = ">=3.9"
files = [
{file = "numba-0.59.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d061d800473fb8fef76a455221f4ad649a53f5e0f96e3f6c8b8553ee6fa98fa"},
@ -1408,7 +1408,7 @@ files = [
name = "nvidia-cublas-cu12"
version = "12.1.3.1"
description = "CUBLAS native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
@ -1419,7 +1419,7 @@ files = [
name = "nvidia-cuda-cupti-cu12"
version = "12.1.105"
description = "CUDA profiling tools runtime libs."
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
@ -1430,7 +1430,7 @@ files = [
name = "nvidia-cuda-nvrtc-cu12"
version = "12.1.105"
description = "NVRTC native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
@ -1441,7 +1441,7 @@ files = [
name = "nvidia-cuda-runtime-cu12"
version = "12.1.105"
description = "CUDA Runtime native Libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
@ -1452,7 +1452,7 @@ files = [
name = "nvidia-cudnn-cu12"
version = "8.9.2.26"
description = "cuDNN runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
@ -1465,7 +1465,7 @@ nvidia-cublas-cu12 = "*"
name = "nvidia-cufft-cu12"
version = "11.0.2.54"
description = "CUFFT native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
@ -1476,7 +1476,7 @@ files = [
name = "nvidia-curand-cu12"
version = "10.3.2.106"
description = "CURAND native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
@ -1487,7 +1487,7 @@ files = [
name = "nvidia-cusolver-cu12"
version = "11.4.5.107"
description = "CUDA solver native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
@ -1503,7 +1503,7 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-cusparse-cu12"
version = "12.1.0.106"
description = "CUSPARSE native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
@ -1517,7 +1517,7 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-nccl-cu12"
version = "2.19.3"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"},
@ -1527,7 +1527,7 @@ files = [
name = "nvidia-nvjitlink-cu12"
version = "12.3.101"
description = "Nvidia JIT LTO Library"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
@ -1538,7 +1538,7 @@ files = [
name = "nvidia-nvtx-cu12"
version = "12.1.105"
description = "NVIDIA Tools Extension"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
@ -1703,7 +1703,7 @@ files = [
name = "outlines"
version = "0.0.27"
description = "Probabilistic Generative Model Programming"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "outlines-0.0.27-py3-none-any.whl", hash = "sha256:dd614f49760ff8870a5d491fad4a372d7b7d4da5c1646f1b42f12a9d5e34db4b"},
@ -2055,7 +2055,7 @@ files = [
name = "pydantic"
version = "2.6.1"
description = "Data validation using Python type hints"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"},
@ -2074,7 +2074,7 @@ email = ["email-validator (>=2.0.0)"]
name = "pydantic-core"
version = "2.16.2"
description = ""
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"},
@ -2271,7 +2271,7 @@ files = [
name = "referencing"
version = "0.33.0"
description = "JSON Referencing + Python"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "referencing-0.33.0-py3-none-any.whl", hash = "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5"},
@ -2409,7 +2409,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
name = "rpds-py"
version = "0.18.0"
description = "Python bindings to Rust's persistent data structures (rpds)"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "rpds_py-0.18.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e"},
@ -2719,7 +2719,7 @@ files = [
name = "sympy"
version = "1.12"
description = "Computer algebra system (CAS) in Python"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
@ -2882,7 +2882,7 @@ files = [
name = "torch"
version = "2.2.0"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
optional = true
python-versions = ">=3.8.0"
files = [
{file = "torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d366158d6503a3447e67f8c0ad1328d54e6c181d88572d688a625fac61b13a97"},
@ -3028,7 +3028,7 @@ vision = ["Pillow (>=10.0.1,<=15.0)"]
name = "triton"
version = "2.2.0"
description = "A language and compiler for custom Deep Learning operations"
optional = false
optional = true
python-versions = "*"
files = [
{file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"},
@ -3422,6 +3422,7 @@ multidict = ">=4.0"
[extras]
accelerate = ["accelerate"]
bnb = ["bitsandbytes"]
outlines = ["outlines"]
peft = ["peft"]
quantize = ["accelerate", "datasets", "texttable"]
torch = ["torch"]
@ -3429,4 +3430,4 @@ torch = ["torch"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.13"
content-hash = "c2990763ec38a0249cbb2140f0fb87d2ff4633c4810e4ec6f979e821ecb5442e"
content-hash = "47696ea72017636437a1bb73aa6463f064cae02620cbf93027fad8f2ebecd014"

View File

@ -34,7 +34,7 @@ peft = { version = "^0.8.2", optional = true }
torch = { version = "^2.1.1", optional = true }
scipy = "^1.11.1"
pillow = "^10.0.0"
outlines="^0.0.27"
outlines= { version = "^0.0.27", optional = true }
[tool.poetry.extras]
torch = ["torch"]
@ -42,6 +42,7 @@ accelerate = ["accelerate"]
bnb = ["bitsandbytes"]
peft = ["peft"]
quantize = ["texttable", "datasets", "accelerate"]
outlines = ["outlines"]
[tool.poetry.group.dev.dependencies]
grpcio-tools = "^1.51.1"

View File

@ -1,14 +1,10 @@
annotated-types==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13"
backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
bitsandbytes==0.41.3.post2 ; python_version >= "3.9" and python_version < "3.13"
certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
cloudpickle==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
diskcache==5.6.3 ; python_version >= "3.9" and python_version < "3.13"
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
@ -20,32 +16,8 @@ grpcio==1.60.1 ; python_version >= "3.9" and python_version < "3.13"
hf-transfer==0.1.5 ; python_version >= "3.9" and python_version < "3.13"
huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
interegular==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
jinja2==3.1.3 ; python_version >= "3.9" and python_version < "3.13"
joblib==1.3.2 ; python_version >= "3.9" and python_version < "3.13"
jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "3.13"
jsonschema==4.21.1 ; python_version >= "3.9" and python_version < "3.13"
lark==1.1.9 ; python_version >= "3.9" and python_version < "3.13"
llvmlite==0.42.0 ; python_version >= "3.9" and python_version < "3.13"
loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13"
mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
nest-asyncio==1.6.0 ; python_version >= "3.9" and python_version < "3.13"
networkx==3.2.1 ; python_version >= "3.9" and python_version < "3.13"
numba==0.59.0 ; python_version >= "3.9" and python_version < "3.13"
numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
nvidia-cublas-cu12==12.1.3.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-cupti-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-runtime-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cudnn-cu12==8.9.2.26 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cufft-cu12==11.0.2.54 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-curand-cu12==10.3.2.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cusolver-cu12==11.4.5.107 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cusparse-cu12==12.1.0.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nccl-cu12==2.19.3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nvjitlink-cu12==12.3.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@ -55,27 +27,19 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
outlines==0.0.27 ; python_version >= "3.9" and python_version < "3.13"
packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
pillow==10.2.0 ; python_version >= "3.9" and python_version < "3.13"
protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
pydantic-core==2.16.2 ; python_version >= "3.9" and python_version < "3.13"
pydantic==2.6.1 ; python_version >= "3.9" and python_version < "3.13"
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
referencing==0.33.0 ; python_version >= "3.9" and python_version < "3.13"
regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
rpds-py==0.18.0 ; python_version >= "3.9" and python_version < "3.13"
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
scipy==1.12.0 ; python_version >= "3.9" and python_version < "3.13"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
setuptools==69.1.0 ; python_version >= "3.9" and python_version < "3.13"
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
tokenizers==0.15.2 ; python_version >= "3.9" and python_version < "3.13"
torch==2.2.0 ; python_version >= "3.9" and python_version < "3.13"
tqdm==4.66.2 ; python_version >= "3.9" and python_version < "3.13"
transformers==4.37.1 ; python_version >= "3.9" and python_version < "3.13"
triton==2.2.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "3.13"
urllib3==2.2.0 ; python_version >= "3.9" and python_version < "3.13"

View File

@ -1,13 +1,9 @@
annotated-types==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13"
backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
cloudpickle==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
diskcache==5.6.3 ; python_version >= "3.9" and python_version < "3.13"
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
@ -19,32 +15,8 @@ grpcio==1.60.1 ; python_version >= "3.9" and python_version < "3.13"
hf-transfer==0.1.5 ; python_version >= "3.9" and python_version < "3.13"
huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
interegular==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
jinja2==3.1.3 ; python_version >= "3.9" and python_version < "3.13"
joblib==1.3.2 ; python_version >= "3.9" and python_version < "3.13"
jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "3.13"
jsonschema==4.21.1 ; python_version >= "3.9" and python_version < "3.13"
lark==1.1.9 ; python_version >= "3.9" and python_version < "3.13"
llvmlite==0.42.0 ; python_version >= "3.9" and python_version < "3.13"
loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13"
mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
nest-asyncio==1.6.0 ; python_version >= "3.9" and python_version < "3.13"
networkx==3.2.1 ; python_version >= "3.9" and python_version < "3.13"
numba==0.59.0 ; python_version >= "3.9" and python_version < "3.13"
numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
nvidia-cublas-cu12==12.1.3.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-cupti-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cuda-runtime-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cudnn-cu12==8.9.2.26 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cufft-cu12==11.0.2.54 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-curand-cu12==10.3.2.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cusolver-cu12==11.4.5.107 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-cusparse-cu12==12.1.0.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nccl-cu12==2.19.3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nvjitlink-cu12==12.3.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@ -54,27 +26,19 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
outlines==0.0.27 ; python_version >= "3.9" and python_version < "3.13"
packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
pillow==10.2.0 ; python_version >= "3.9" and python_version < "3.13"
protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
pydantic-core==2.16.2 ; python_version >= "3.9" and python_version < "3.13"
pydantic==2.6.1 ; python_version >= "3.9" and python_version < "3.13"
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
referencing==0.33.0 ; python_version >= "3.9" and python_version < "3.13"
regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
rpds-py==0.18.0 ; python_version >= "3.9" and python_version < "3.13"
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
scipy==1.12.0 ; python_version >= "3.9" and python_version < "3.13"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
setuptools==69.1.0 ; python_version >= "3.9" and python_version < "3.13"
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
tokenizers==0.15.2 ; python_version >= "3.9" and python_version < "3.13"
torch==2.2.0 ; python_version >= "3.9" and python_version < "3.13"
tqdm==4.66.2 ; python_version >= "3.9" and python_version < "3.13"
transformers==4.37.1 ; python_version >= "3.9" and python_version < "3.13"
triton==2.2.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13"
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "3.13"
urllib3==2.2.0 ; python_version >= "3.9" and python_version < "3.13"

View File

@ -415,14 +415,14 @@ class CausalLMBatch(Batch):
# We slice the keys to remove the padding from previous batches
past_seq_len = batch.max_input_length - 1
if batch.keys_head_dim_last:
padded_past_keys[start_index:end_index, :, -past_seq_len:, :] = (
past_keys[:, :, -past_seq_len:, :]
)
padded_past_keys[
start_index:end_index, :, -past_seq_len:, :
] = past_keys[:, :, -past_seq_len:, :]
else:
# BLOOM case
padded_past_keys[start_index:end_index, :, :, -past_seq_len:] = (
past_keys[:, :, :, -past_seq_len:]
)
padded_past_keys[
start_index:end_index, :, :, -past_seq_len:
] = past_keys[:, :, :, -past_seq_len:]
del past_keys
start_index = end_index
@ -440,9 +440,9 @@ class CausalLMBatch(Batch):
end_index = start_index + len(batch)
# We slice the past values to remove the padding from previous batches
past_seq_len = batch.max_input_length - 1
padded_past_values[start_index:end_index, :, -past_seq_len:, :] = (
past_values[:, :, -past_seq_len:, :]
)
padded_past_values[
start_index:end_index, :, -past_seq_len:, :
] = past_values[:, :, -past_seq_len:, :]
del past_values
# Update values

View File

@ -1017,9 +1017,9 @@ class FlashCausalLM(Model):
# Copy batch.input_ids to prefill_token_indices
if prefill_logprobs:
if len(batch) > 1:
prefill_tokens_indices[out_start_index : out_end_index - 1] = (
batch.input_ids[start_index + 1 : start_index + out_length]
)
prefill_tokens_indices[
out_start_index : out_end_index - 1
] = batch.input_ids[start_index + 1 : start_index + out_length]
else:
# Set prefill_tokens_indices to the correct slice
prefill_tokens_indices = batch.input_ids[

View File

@ -1,10 +1,8 @@
import math
import torch
import json
from loguru import logger
from functools import lru_cache
from typing import Optional, List, Dict, Union
from typing import Dict, Union
from text_generation_server.pb.generate_pb2 import GrammarType
from outlines.fsm.fsm import RegexFSM
@ -492,7 +490,7 @@ class GrammarLogitProcessor(LogitsProcessor):
if fsm_grammar_state == -1 or self.fsm is None:
return logits
allowed_tokens = self.fsm.allowed_token_ids(fsm_grammar_state)
mask = torch.full((logits.shape[-1],), -math.inf, device=self.device)
mask = torch.full_like(logits, -math.inf)
mask[allowed_tokens] = 0
biased_scores = logits + mask
return biased_scores
@ -550,22 +548,15 @@ class GrammarLogitProcessor(LogitsProcessor):
logger.debug(f"Adapted tokenizer in {time.time() - start_time:.2f}s")
return tokenizer
def filter(self, indices):
new_fsms = []
for i in indices:
new_fsms.append(self.fsms[i])
self.fsms = new_fsms
return self
class HeterogeneousGrammarLogitProcessor(LogitsProcessor):
def __init__(self, tokenizer, device, grammars, grammar_type):
def __init__(self, tokenizer, device, grammars, grammar_types):
self.device = device
self.tokenizer = GrammarLogitProcessor._cached_adapt_tokenizer(tokenizer)
self.fsms = []
for i in range(len(grammars)):
for grammar, grammar_type in zip(grammars, grammar_types):
fsm = GrammarLogitProcessor._cached_compile_fsm(
grammar_type[i], grammars[i], self.tokenizer
grammar_type, grammar, self.tokenizer
)
self.fsms.append(fsm)
@ -573,7 +564,6 @@ class HeterogeneousGrammarLogitProcessor(LogitsProcessor):
self,
logits: torch.Tensor,
fsm_grammar_states: List[int],
mask: torch.Tensor,
):
mask = torch.full_like(logits, -math.inf)
for i in range(logits.shape[0]):
@ -585,7 +575,7 @@ class HeterogeneousGrammarLogitProcessor(LogitsProcessor):
logits += mask
return logits
def advance_batch(self, next_token_ids, fsm_grammar_states, grammars):
def advance_batch(self, next_token_ids, fsm_grammar_states):
return [
GrammarLogitProcessor._advance(
next_token_ids[i], fsm_grammar_states[i], self.fsms[i]
@ -599,4 +589,8 @@ class HeterogeneousGrammarLogitProcessor(LogitsProcessor):
)
def filter(self, indices):
return GrammarLogitProcessor.filter(self, indices)
new_fsms = []
for i in indices:
new_fsms.append(self.fsms[i])
self.fsms = new_fsms
return self