feat: tokenize each request individually and increase warmup image size

This commit is contained in:
drbh 2024-12-05 10:58:37 -05:00
parent 5f78ec32a5
commit 1bcfba305b
6 changed files with 42 additions and 11 deletions

View File

@ -86,6 +86,6 @@ impl ChunksToString for Vec<InputChunk> {
} }
} }
static WARMUP_IMAGE_BASE64 :&str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg==";
pub type Result<T> = std::result::Result<T, ClientError>; pub type Result<T> = std::result::Result<T, ClientError>;

View File

@ -63,6 +63,6 @@ impl From<transport::Error> for ClientError {
} }
} }
static WARMUP_IMAGE_BASE64 :&str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg==";
pub type Result<T> = std::result::Result<T, ClientError>; pub type Result<T> = std::result::Result<T, ClientError>;

View File

@ -62,6 +62,6 @@ impl From<Chunk> for InputChunk {
} }
} }
static WARMUP_IMAGE_BASE64 :&str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg==";
pub type Result<T> = std::result::Result<T, ClientError>; pub type Result<T> = std::result::Result<T, ClientError>;

View File

@ -0,0 +1,38 @@
import pytest
@pytest.fixture(scope="module")
def flash_qwen2_vl_handle(launcher):
with launcher(
"Qwen/Qwen2-VL-2B-Instruct",
max_input_tokens=40,
max_batch_prefill_tokens=50,
max_total_tokens=51,
) as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_qwen2(flash_qwen2_vl_handle):
await flash_qwen2_vl_handle.health(300)
return flash_qwen2_vl_handle.client
@pytest.mark.private
async def test_flash_qwen2_vl_simple(flash_qwen2, response_snapshot):
response = await flash_qwen2.chat(
max_tokens=20,
seed=42,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What is the color of the sky?"},
],
},
],
)
assert response.choices[0].message.content == "The correct answer is: blue"
assert response == response_snapshot

View File

@ -533,6 +533,7 @@ class Qwen2VLForConditionalGeneration(nn.Module):
).squeeze(0) ).squeeze(0)
inputs_embeds[input_ids == self.image_token_id] = image_embeds inputs_embeds[input_ids == self.image_token_id] = image_embeds
max_s = max(max_s, inputs_embeds.size(0))
hidden_states = self.text_model( hidden_states = self.text_model(
inputs_embeds=inputs_embeds, inputs_embeds=inputs_embeds,
position_ids=position_ids, position_ids=position_ids,

View File

@ -229,14 +229,6 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
pass pass
elif chunk_type == "image": elif chunk_type == "image":
image = Image.open(BytesIO(chunk.image.data)) image = Image.open(BytesIO(chunk.image.data))
# qwen2_vl expects images to be greater than 20 pixels, this is for warmup since the
# default warmup image is 20x20
if config.model_type == "qwen2_vl":
if image.width <= 20:
w = image.width * 2
h = image.height * 2
image = image.resize((w, h))
if config.model_type == "llava_next": if config.model_type == "llava_next":
images.append(image) images.append(image)
else: else: