mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Let's try non sharded gemma.
This commit is contained in:
parent
15e178e3ad
commit
1e03ea96d0
2
.github/workflows/build.yaml
vendored
2
.github/workflows/build.yaml
vendored
@ -78,7 +78,7 @@ jobs:
|
||||
# export runs_on="ubuntu-latest"
|
||||
export runs_on="aws-highmemory-32-plus-priv"
|
||||
export platform="cpu"
|
||||
export extra_pytest="-k test_flash_llama_simple"
|
||||
export extra_pytest="-k test_flash_gemma_simple"
|
||||
;;
|
||||
esac
|
||||
echo $dockerfile
|
||||
|
@ -16,7 +16,7 @@ async def flash_gemma(flash_gemma_handle):
|
||||
@pytest.mark.release
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_flash_gemma(flash_gemma, response_snapshot):
|
||||
async def test_flash_gemma_simple(flash_gemma, response_snapshot):
|
||||
response = await flash_gemma.generate(
|
||||
"Test request", max_new_tokens=10, decoder_input_details=True
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user