This commit is contained in:
deng 2023-03-29 08:45:56 +08:00
commit c7af8b2022
29 changed files with 282 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.DS_Store

2
.prefectignore Normal file
View File

@ -0,0 +1,2 @@
service
docker-compose.yaml

25
README.md Normal file
View File

@ -0,0 +1,25 @@
# Abstract
This is a demo to use [Prefect 2.0](https://github.com/PrefectHQ/prefect) and [Minio](https://github.com/minio/minio) to build a service for data pipeline orchestration🎼
# Requirements
* MacOS 12.5
* Docker 20.10
# Dirs
* **service**
* House Prefect server data and Minio storage
* **env**
* **prefect.yaml**
* conda env yaml to run this repo
# Files
* **docker-compose.yaml**
* a yaml to apply docker-compose to start data pipeline service with basic configuration (run ```docker-compose -f docker-compose.yaml up```)
* **build_flow.py**
* a script to create a small flow and deploy it to the data pipeline server
###### tags: `Data pipeline` `Object Storage` `Data Engineering`

82
build_flow.py Normal file
View File

@ -0,0 +1,82 @@
# build_flow.py
#
# author: deng
# date : 20230328
import time
from prefect import flow
from prefect import task
from prefect import get_run_logger
from prefect.deployments import Deployment
from prefect.filesystems import RemoteFileSystem
from prefect.infrastructure import DockerContainer
from prefect.task_runners import ConcurrentTaskRunner
@task(name='stop_at_floor',
description='moving stage',
retries=0,
retry_delay_seconds=1)
def stop_at_floor(floor: int) -> None:
logger = get_run_logger()
logger.info(f'elevator moving to floor {floor}')
time.sleep(floor)
logger.info(f'elevator stops on floor {floor}')
@flow(name='elevator',
description='this is a cute elevator',
task_runner=ConcurrentTaskRunner(),
timeout_seconds=60)
def elevator():
for floor in range(5, 0, -1):
stop_at_floor.submit(floor)
def build_deployment() -> None:
""" Deploy flow to docker-based Prefect server """
infra_block = DockerContainer(
image='test_prefect:20230328-1712',
image_pull_policy='IF_NOT_PRESENT',
auto_remove=True,
networks=['prefect'],
env={
'USE_SSL': False,
'AWS_ACCESS_KEY_ID': 'root',
'AWS_SECRET_ACCESS_KEY': 'minio_password',
'ENDPOINT_URL': 'http://127.0.0.1:9000'
}
)
storage_block = RemoteFileSystem(
basepath='s3://prefect-deployment/test_prefect',
settings={
'use_ssl': False,
'key': 'root',
'secret': 'minio_password',
'client_kwargs': {'endpoint_url': 'http://127.0.0.1:9000'}
}
)
infra_block.save('test-prefect-infra', overwrite=True)
storage_block.save('test-prefect-storage', overwrite=True)
deployment = Deployment.build_from_flow(
flow=elevator,
name='test_prefect',
output='test_prefect-deployment.yaml',
tags=['test_prefect'],
work_queue_name='default',
parameters={},
infrastructure=infra_block,
storage=storage_block
)
deployment.apply()
if __name__ == '__main__':
build_deployment()

71
docker-compose.yaml Normal file
View File

@ -0,0 +1,71 @@
version: '3.7'
networks:
prefect:
name: prefect
services:
minio:
image: minio/minio:latest
restart: always
environment:
MINIO_ROOT_USER: root
MINIO_ROOT_PASSWORD: minio_password
ports:
- 9000:9000
- 9001:9001
volumes:
- ~/python/test_prefect/service/minio:/data
command:
- server
- --console-address
- :9001
- /data
networks:
- prefect
prefect-server:
image: prefecthq/prefect:2.8.7-python3.10
restart: always
ports:
- 4200:4200
volumes:
- ~/python/test_prefect/service/prefect:/root/.prefect
healthcheck:
test: ["CMD", "curl", "-f", "http://0.0.0.0:4200"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
command: bash -c "apt update && apt install -y curl && prefect orion start"
depends_on:
minio:
condition: service_started
environment:
PREFECT_DEBUG_MODE: 'True'
PREFECT_ORION_API_HOST: 0.0.0.0
PREFECT_ORION_API_PORT: 4200
PREFECT_API_URL: http://127.0.0.1:4200/api
networks:
- prefect
prefect-agent:
image: prefecthq/prefect:2.8.7-python3.10
restart: always
volumes:
- /var/run/docker.sock:/var/run/docker.sock
command:
- prefect
- agent
- start
- -q
- default
depends_on:
prefect-server:
condition: service_started
environment:
PREFECT_API_URL: http://prefect-server:4200/api
PREFECT_LOGGING_LEVEL: DEBUG
DOCKER_HOST: unix://var/run/docker.sock
networks:
- prefect

6
env/Dockerfile vendored Normal file
View File

@ -0,0 +1,6 @@
FROM python:3.10-slim
WORKDIR /prefect
RUN pip install s3fs
RUN pip install prefect==2.8.7

8
env/build_infra.sh vendored Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
# build_infra.sh
#
# author: deng
# date : 20230328
buildtime=$(date +"%Y%m%d-%H%M")
docker build --no-cache -f Dockerfile -t test_prefect:${buildtime} .

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
{"version":"1","format":"xl-single","id":"d3e5292e-7b32-44b2-b4ec-545e359fc2db","xl":{"version":"3","this":"88e54d68-0e10-4257-825d-eab6afe6bf5c","sets":[["88e54d68-0e10-4257-825d-eab6afe6bf5c"]],"distributionAlgo":"SIPMOD+PARITY"}}

Binary file not shown.

View File

@ -0,0 +1 @@
block_auto_registration = "0016c72a6d80786d0b6c25d0bf5fed0752a58b96c7b3643cd9c27c2b250c6a75"

BIN
service/prefect/prefect.db Normal file

Binary file not shown.

15
test_asyncio.py Normal file
View File

@ -0,0 +1,15 @@
# test_asyncio.py
#
# author: deng
# date : 20230328
import asyncio
async def main():
await asyncio.sleep(1)
print('hello')
if __name__ == '__main__':
asyncio.run(main())

View File

@ -0,0 +1,70 @@
###
### A complete description of a Prefect Deployment for flow 'elevator'
###
name: test_prefect
description: this is a cute elevator
version: 44e21b51f98000011b5960b821f33b31
# The work queue that will handle this deployment's runs
work_queue_name: default
work_pool_name: default-agent-pool
tags:
- test_prefect
parameters: {}
schedule: null
is_schedule_active: true
infra_overrides: {}
###
### DO NOT EDIT BELOW THIS LINE
###
flow_name: elevator
manifest_path: null
infrastructure:
type: docker-container
env:
USE_SSL: 'False'
AWS_ACCESS_KEY_ID: root
AWS_SECRET_ACCESS_KEY: minio_password
ENDPOINT_URL: http://127.0.0.1:9000
labels: {}
name: null
command: null
image: test_prefect:20230328-1712
image_pull_policy: IF_NOT_PRESENT
image_registry: null
networks:
- prefect
network_mode: null
auto_remove: true
volumes: []
stream_output: true
memswap_limit: null
mem_limit: null
privileged: false
_is_anonymous: false
_block_document_name: test-prefec-infra
_block_document_id: c0336aea-3d1e-4c77-b2fe-66e53263a1cc
block_type_slug: docker-container
_block_type_slug: docker-container
storage:
basepath: s3://prefect-deployment/test_prefect
settings:
use_ssl: false
key: root
secret: minio_password
client_kwargs:
endpoint_url: http://127.0.0.1:9000
_is_anonymous: false
_block_document_name: test-prefect-storage
_block_document_id: 9d763296-4114-4a72-8fa7-c9c7c2464b9c
block_type_slug: remote-file-system
_block_type_slug: remote-file-system
path: null
entrypoint: build_flow.py:elevator
parameter_openapi_schema:
title: Parameters
type: object
properties: {}
required: null
definitions: null
timestamp: '2023-03-28T09:22:52.437835+00:00'