diff --git a/README.md b/README.md
index 2635f641..67940ae8 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ to power Bloom, BloomZ and MT0-XXL api-inference widgets.
 - [MT0-XXL](https://huggingface.co/bigscience/mt0-xxl)
 - ~~[Galactica](https://huggingface.co/facebook/galactica-120b)~~ (deactivated)
 - [SantaCoder](https://huggingface.co/bigcode/santacoder)
+- [GPT-Neox 20B](https://huggingface.co/EleutherAI/gpt-neox-20b): use `--revision refs/pr/13`
 
 Other models are supported on a best effort basis using:
 
diff --git a/server/text_generation/utils.py b/server/text_generation/utils.py
index 241973bf..62d60635 100644
--- a/server/text_generation/utils.py
+++ b/server/text_generation/utils.py
@@ -198,11 +198,10 @@ def try_to_load_from_cache(model_name, revision, filename):
 
     # Resolve refs (for instance to convert main to the associated commit sha)
     if refs_dir.is_dir():
-        for p in refs_dir.rglob("*"):
-            if str(p).endswith(revision):
-                with (refs_dir / revision).open() as f:
-                    revision = f.read()
-                break
+        revision_file = refs_dir / revision
+        if revision_file.exists():
+            with revision_file.open() as f:
+                revision = f.read()
 
     # Check if file is cached as "no_exist"
     if (no_exist_dir / revision / filename).is_file():