Kserve Custom Resource
ServingRuntimes
python3 -m pip install kserve
Storage
를 사용해야하는 경우 kserve[storage]
를 설치해야합니다.
<project>
├── app/
│ ├── __init__.py
│ ├── runtime/
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ └── predictor.py
│ └── ...
└── ...
app/runtime/__main__.py
import argparse
from kserve import ModelServer, model_server
# from kserve.storage import Storage
from app.runtime.predictor import DEFAULT_MODEL_NAME, Predictor
parser = argparse.ArgumentParser(parents=[model_server.parser])
parser.add_argument("--model_dir", required=True, help="A URI pointer to the model binary")
parser.add_argument(
"--model_name", help="The name that the model is served under.", default=DEFAULT_MODEL_NAME
)
args, _ = parser.parse_known_args()
ModelServer().start([Predictor(args.model_name, args.model_dir)])
# ModelServer().start([Predictor(args.model_name, Storage.download(args.model_dir))])
app/runtime/predictor.py
from kserve import InferRequest, InferResponse, Model
DEFAULT_MODEL_NAME = "model"
DEFAULT_LOCAL_MODEL_DIR = "/tmp/model"
class Predictor(Model):
def __init__(self, name: str = DEFAULT_MODEL_NAME, model_dir: str = DEFAULT_LOCAL_MODEL_DIR):
super().__init__(name)
self._model_dir = model_dir
self.load()
def load(self):
self.ready = True
async def preprocess(
self, payload: InferRequest, headers: dict[str, str] | None = None
) -> InferRequest:
return payload
async def predict(
self, payload: InferRequest, headers: dict[str, str] | None = None
) -> InferResponse:
return InferResponse(
response_id=payload.id,
model_name=self.name,
infer_outputs={},
)
def postprocess(
self, response: InferResponse, headers: dict[str, str] | None = None
) -> InferResponse:
return response
python3 -m app.runtime
Kserve Args
- built-in flags
--http_port=8080
--grpc_port=8081
--workers=1
: RestAPI 프로세스--max_threads=4
: gRPC 스레드--max_asyncio_workers=<thread>
: 이벤트 루프 스레드, 기본값:min(32, CPU + 4)
--enable_grpc=true
--enable_docs_url=false
--enable_latency_logging=true
--configure_logging=true
--log_config_file=<path>
--access_log_format=<format>
- 추가
--model_dir=<path>
--model_name=<name>
Transformer
Transformer는 ServingRuntimes에서 predict
만 제외하고 구현하면 됩니다.
from kserve import InferRequest, InferResponse, Model
DEFAULT_MODEL_NAME = "model"
class Predictor(Model):
def __init__(self, name: str = DEFAULT_MODEL_NAME):
super().__init__(name)
async def preprocess(
self, payload: InferRequest, headers: dict[str, str] | None = None
) -> InferRequest:
return payload
def postprocess(
self, response: InferResponse, headers: dict[str, str] | None = None
) -> InferResponse:
return response