record model input&output, save model to file system
This commit is contained in:
parent
6e80927c40
commit
7015b5c1a5
9
env.yaml
9
env.yaml
|
@ -1,6 +1,7 @@
|
|||
name: torch
|
||||
channels:
|
||||
- pytorch
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- alembic=1.9.4
|
||||
|
@ -8,10 +9,11 @@ dependencies:
|
|||
- appdirs=1.4.4
|
||||
- bcrypt=3.2.2
|
||||
- blinker=1.5
|
||||
- bottleneck=1.3.4
|
||||
- brotlipy=0.7.0
|
||||
- bzip2=1.0.8
|
||||
- ca-certificates=2022.12.7
|
||||
- certifi=2022.12.7
|
||||
- ca-certificates=2022.4.26
|
||||
- certifi=2022.6.15
|
||||
- cffi=1.15.1
|
||||
- charset-normalizer=2.1.1
|
||||
- click=8.1.3
|
||||
|
@ -80,6 +82,7 @@ dependencies:
|
|||
- mlflow=1.30.0
|
||||
- ncurses=6.3
|
||||
- nettle=3.8.1
|
||||
- numexpr=2.8.1
|
||||
- numpy=1.24.2
|
||||
- oauthlib=3.2.2
|
||||
- openh264=2.3.1
|
||||
|
@ -87,7 +90,7 @@ dependencies:
|
|||
- openssl=3.0.8
|
||||
- p11-kit=0.24.1
|
||||
- packaging=21.3
|
||||
- pandas=1.5.3
|
||||
- pandas=1.4.2
|
||||
- paramiko=3.0.0
|
||||
- pillow=9.4.0
|
||||
- pip=23.0.1
|
||||
|
|
11
predict.py
11
predict.py
|
@ -13,9 +13,12 @@ if __name__ == '__main__':
|
|||
mlflow.set_tracking_uri('http://127.0.0.1:5000')
|
||||
|
||||
# load production model
|
||||
model = mlflow.pytorch.load_model('models:/cls_model/production')
|
||||
model = mlflow.pytorch.load_model('models:/fortune_predict_model/production')
|
||||
|
||||
# predict
|
||||
fake_data = torch.randn(10)
|
||||
output = model(fake_data)
|
||||
print(output)
|
||||
my_personal_info = torch.randn(5)
|
||||
my_fortune = model(my_personal_info)
|
||||
print(my_fortune)
|
||||
|
||||
# save model and env to local file system
|
||||
mlflow.pytorch.save_model(model, './fortune_predict_model')
|
|
@ -4,4 +4,4 @@
|
|||
# author: deng
|
||||
# date : 20230221
|
||||
|
||||
mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts
|
||||
mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --port 5000
|
39
train.py
39
train.py
|
@ -6,16 +6,18 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.optim import SGD
|
||||
from tqdm import tqdm
|
||||
import mlflow
|
||||
from mlflow.models.signature import ModelSignature
|
||||
from mlflow.types.schema import Schema, ColSpec
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
""" define a simple neural network model """
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.fc1 = nn.Linear(10, 5)
|
||||
self.fc2 = nn.Linear(5, 1)
|
||||
self.fc1 = nn.Linear(5, 3)
|
||||
self.fc2 = nn.Linear(3, 1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
|
@ -28,7 +30,7 @@ def train(model, dataloader, criterion, optimizer, epochs):
|
|||
""" define the training function """
|
||||
for epoch in tqdm(range(epochs), 'Epochs'):
|
||||
|
||||
for i, (inputs, labels) in enumerate(dataloader):
|
||||
for batch, (inputs, labels) in enumerate(dataloader):
|
||||
|
||||
# forwarding
|
||||
outputs = model(inputs)
|
||||
|
@ -40,7 +42,7 @@ def train(model, dataloader, criterion, optimizer, epochs):
|
|||
optimizer.step()
|
||||
|
||||
# log loss
|
||||
mlflow.log_metric('train_loss', loss.item(), step=i)
|
||||
mlflow.log_metric('train_loss', loss.item(), step=epoch)
|
||||
|
||||
return loss
|
||||
|
||||
|
@ -49,11 +51,12 @@ if __name__ == '__main__':
|
|||
|
||||
# set hyper parameters
|
||||
learning_rate = 1e-2
|
||||
batch_size = 10
|
||||
epochs = 20
|
||||
|
||||
# create a dataloader with fake data
|
||||
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
|
||||
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
|
||||
dataloader = [(torch.randn(5), torch.randn(1)) for _ in range(100)]
|
||||
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=batch_size)
|
||||
|
||||
# create the model, criterion, and optimizer
|
||||
model = Net()
|
||||
|
@ -62,18 +65,34 @@ if __name__ == '__main__':
|
|||
|
||||
# set the tracking URI to the model registry
|
||||
mlflow.set_tracking_uri('http://127.0.0.1:5000')
|
||||
mlflow.set_experiment('train_fortune_predict_model')
|
||||
|
||||
# start the MLflow run
|
||||
# start a new MLflow run
|
||||
with mlflow.start_run():
|
||||
|
||||
# train the model and log the loss
|
||||
# train the model
|
||||
loss = train(model, dataloader, criterion, optimizer, epochs)
|
||||
|
||||
# log some additional metrics
|
||||
mlflow.log_metric('final_loss', loss.item())
|
||||
mlflow.log_param('learning_rate', learning_rate)
|
||||
mlflow.log_param('batch_size', batch_size)
|
||||
|
||||
# create a signature to record model input and output info
|
||||
input_schema = Schema([
|
||||
ColSpec('float', 'age'),
|
||||
ColSpec('float', 'mood level'),
|
||||
ColSpec('float', 'health level'),
|
||||
ColSpec('float', 'hungry level'),
|
||||
ColSpec('float', 'sexy level')
|
||||
])
|
||||
output_schema = Schema([ColSpec('float', 'fortune')])
|
||||
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
|
||||
|
||||
# log trained model
|
||||
mlflow.pytorch.log_model(model, 'model')
|
||||
mlflow.pytorch.log_model(model, 'model', signature=signature)
|
||||
|
||||
# log training code
|
||||
mlflow.log_artifact('./train.py', 'code')
|
||||
|
||||
print('Completed.')
|
Loading…
Reference in New Issue