2022-10-08 10:30:12 +00:00
|
|
|
use bloom_inference_client::ShardedClient;
|
2022-10-11 08:36:51 +00:00
|
|
|
use poem::listener::TcpListener;
|
2022-10-08 10:30:12 +00:00
|
|
|
use std::time::Duration;
|
2022-10-11 14:50:54 +00:00
|
|
|
use tokenizers::Tokenizer;
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 08:36:51 +00:00
|
|
|
mod server;
|
2022-10-11 14:50:54 +00:00
|
|
|
mod validation;
|
|
|
|
|
|
|
|
use validation::Validation;
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 08:36:51 +00:00
|
|
|
mod db;
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-08 10:30:12 +00:00
|
|
|
use db::Db;
|
|
|
|
|
2022-10-11 08:36:51 +00:00
|
|
|
mod batcher;
|
2022-10-11 14:50:54 +00:00
|
|
|
|
2022-10-11 08:36:51 +00:00
|
|
|
use batcher::Batcher;
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 14:50:54 +00:00
|
|
|
fn main() -> Result<(), std::io::Error> {
|
|
|
|
let tokenizer = Tokenizer::from_pretrained("bigscience/bloom", None).unwrap();
|
|
|
|
|
|
|
|
tokio::runtime::Builder::new_multi_thread()
|
|
|
|
.enable_all()
|
|
|
|
.build()
|
|
|
|
.unwrap()
|
|
|
|
.block_on(async {
|
|
|
|
tracing_subscriber::fmt::init();
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 14:50:54 +00:00
|
|
|
let sharded_client = ShardedClient::connect_uds(
|
|
|
|
"/tmp/bloom-inference-0".to_string(),
|
|
|
|
Duration::from_secs(5),
|
|
|
|
)
|
2022-10-08 10:30:12 +00:00
|
|
|
.await;
|
2022-10-11 14:50:54 +00:00
|
|
|
sharded_client
|
|
|
|
.clear_cache()
|
|
|
|
.await
|
|
|
|
.expect("Unable to clear cache");
|
|
|
|
tracing::info!("Connected");
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 14:50:54 +00:00
|
|
|
let addr = "127.0.0.1:3000".to_string();
|
|
|
|
let listener = TcpListener::bind(addr);
|
2022-10-08 10:30:12 +00:00
|
|
|
|
2022-10-11 14:50:54 +00:00
|
|
|
server::run(sharded_client, tokenizer, listener).await
|
|
|
|
})
|
2022-10-08 10:30:12 +00:00
|
|
|
}
|