init
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.DS_Store
|
2
.prefectignore
Normal file
2
.prefectignore
Normal file
@ -0,0 +1,2 @@
|
||||
service
|
||||
docker-compose.yaml
|
25
README.md
Normal file
25
README.md
Normal file
@ -0,0 +1,25 @@
|
||||
# Abstract
|
||||
|
||||
This is a demo to use [Prefect 2.0](https://github.com/PrefectHQ/prefect) and [Minio](https://github.com/minio/minio) to build a service for data pipeline orchestration🎼
|
||||
|
||||
# Requirements
|
||||
|
||||
* MacOS 12.5
|
||||
* Docker 20.10
|
||||
|
||||
# Dirs
|
||||
|
||||
* **service**
|
||||
* House Prefect server data and Minio storage
|
||||
* **env**
|
||||
* **prefect.yaml**
|
||||
* conda env yaml to run this repo
|
||||
|
||||
# Files
|
||||
|
||||
* **docker-compose.yaml**
|
||||
* a yaml to apply docker-compose to start data pipeline service with basic configuration (run ```docker-compose -f docker-compose.yaml up```)
|
||||
* **build_flow.py**
|
||||
* a script to create a small flow and deploy it to the data pipeline server
|
||||
|
||||
###### tags: `Data pipeline` `Object Storage` `Data Engineering`
|
82
build_flow.py
Normal file
82
build_flow.py
Normal file
@ -0,0 +1,82 @@
|
||||
# build_flow.py
|
||||
#
|
||||
# author: deng
|
||||
# date : 20230328
|
||||
|
||||
import time
|
||||
|
||||
from prefect import flow
|
||||
from prefect import task
|
||||
from prefect import get_run_logger
|
||||
from prefect.deployments import Deployment
|
||||
from prefect.filesystems import RemoteFileSystem
|
||||
from prefect.infrastructure import DockerContainer
|
||||
from prefect.task_runners import ConcurrentTaskRunner
|
||||
|
||||
|
||||
@task(name='stop_at_floor',
|
||||
description='moving stage',
|
||||
retries=0,
|
||||
retry_delay_seconds=1)
|
||||
def stop_at_floor(floor: int) -> None:
|
||||
|
||||
logger = get_run_logger()
|
||||
|
||||
logger.info(f'elevator moving to floor {floor}')
|
||||
time.sleep(floor)
|
||||
logger.info(f'elevator stops on floor {floor}')
|
||||
|
||||
|
||||
@flow(name='elevator',
|
||||
description='this is a cute elevator',
|
||||
task_runner=ConcurrentTaskRunner(),
|
||||
timeout_seconds=60)
|
||||
def elevator():
|
||||
for floor in range(5, 0, -1):
|
||||
stop_at_floor.submit(floor)
|
||||
|
||||
|
||||
def build_deployment() -> None:
|
||||
""" Deploy flow to docker-based Prefect server """
|
||||
|
||||
infra_block = DockerContainer(
|
||||
image='test_prefect:20230328-1712',
|
||||
image_pull_policy='IF_NOT_PRESENT',
|
||||
auto_remove=True,
|
||||
networks=['prefect'],
|
||||
env={
|
||||
'USE_SSL': False,
|
||||
'AWS_ACCESS_KEY_ID': 'root',
|
||||
'AWS_SECRET_ACCESS_KEY': 'minio_password',
|
||||
'ENDPOINT_URL': 'http://127.0.0.1:9000'
|
||||
}
|
||||
)
|
||||
|
||||
storage_block = RemoteFileSystem(
|
||||
basepath='s3://prefect-deployment/test_prefect',
|
||||
settings={
|
||||
'use_ssl': False,
|
||||
'key': 'root',
|
||||
'secret': 'minio_password',
|
||||
'client_kwargs': {'endpoint_url': 'http://127.0.0.1:9000'}
|
||||
}
|
||||
)
|
||||
|
||||
infra_block.save('test-prefect-infra', overwrite=True)
|
||||
storage_block.save('test-prefect-storage', overwrite=True)
|
||||
|
||||
deployment = Deployment.build_from_flow(
|
||||
flow=elevator,
|
||||
name='test_prefect',
|
||||
output='test_prefect-deployment.yaml',
|
||||
tags=['test_prefect'],
|
||||
work_queue_name='default',
|
||||
parameters={},
|
||||
infrastructure=infra_block,
|
||||
storage=storage_block
|
||||
)
|
||||
|
||||
deployment.apply()
|
||||
|
||||
if __name__ == '__main__':
|
||||
build_deployment()
|
71
docker-compose.yaml
Normal file
71
docker-compose.yaml
Normal file
@ -0,0 +1,71 @@
|
||||
version: '3.7'
|
||||
|
||||
networks:
|
||||
prefect:
|
||||
name: prefect
|
||||
|
||||
services:
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
restart: always
|
||||
environment:
|
||||
MINIO_ROOT_USER: root
|
||||
MINIO_ROOT_PASSWORD: minio_password
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
volumes:
|
||||
- ~/python/test_prefect/service/minio:/data
|
||||
command:
|
||||
- server
|
||||
- --console-address
|
||||
- :9001
|
||||
- /data
|
||||
networks:
|
||||
- prefect
|
||||
|
||||
prefect-server:
|
||||
image: prefecthq/prefect:2.8.7-python3.10
|
||||
restart: always
|
||||
ports:
|
||||
- 4200:4200
|
||||
volumes:
|
||||
- ~/python/test_prefect/service/prefect:/root/.prefect
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://0.0.0.0:4200"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 20s
|
||||
command: bash -c "apt update && apt install -y curl && prefect orion start"
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_started
|
||||
environment:
|
||||
PREFECT_DEBUG_MODE: 'True'
|
||||
PREFECT_ORION_API_HOST: 0.0.0.0
|
||||
PREFECT_ORION_API_PORT: 4200
|
||||
PREFECT_API_URL: http://127.0.0.1:4200/api
|
||||
networks:
|
||||
- prefect
|
||||
|
||||
prefect-agent:
|
||||
image: prefecthq/prefect:2.8.7-python3.10
|
||||
restart: always
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
command:
|
||||
- prefect
|
||||
- agent
|
||||
- start
|
||||
- -q
|
||||
- default
|
||||
depends_on:
|
||||
prefect-server:
|
||||
condition: service_started
|
||||
environment:
|
||||
PREFECT_API_URL: http://prefect-server:4200/api
|
||||
PREFECT_LOGGING_LEVEL: DEBUG
|
||||
DOCKER_HOST: unix://var/run/docker.sock
|
||||
networks:
|
||||
- prefect
|
6
env/Dockerfile
vendored
Normal file
6
env/Dockerfile
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
FROM python:3.10-slim
|
||||
|
||||
WORKDIR /prefect
|
||||
|
||||
RUN pip install s3fs
|
||||
RUN pip install prefect==2.8.7
|
8
env/build_infra.sh
vendored
Executable file
8
env/build_infra.sh
vendored
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# build_infra.sh
|
||||
#
|
||||
# author: deng
|
||||
# date : 20230328
|
||||
|
||||
buildtime=$(date +"%Y%m%d-%H%M")
|
||||
docker build --no-cache -f Dockerfile -t test_prefect:${buildtime} .
|
BIN
service/minio/.minio.sys/buckets/.bloomcycle.bin/xl.meta
Normal file
BIN
service/minio/.minio.sys/buckets/.bloomcycle.bin/xl.meta
Normal file
Binary file not shown.
BIN
service/minio/.minio.sys/buckets/.usage-cache.bin/xl.meta
Normal file
BIN
service/minio/.minio.sys/buckets/.usage-cache.bin/xl.meta
Normal file
Binary file not shown.
BIN
service/minio/.minio.sys/buckets/.usage.json/xl.meta
Normal file
BIN
service/minio/.minio.sys/buckets/.usage.json/xl.meta
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
service/minio/.minio.sys/config/config.json/xl.meta
Normal file
BIN
service/minio/.minio.sys/config/config.json/xl.meta
Normal file
Binary file not shown.
BIN
service/minio/.minio.sys/config/iam/format.json/xl.meta
Normal file
BIN
service/minio/.minio.sys/config/iam/format.json/xl.meta
Normal file
Binary file not shown.
Binary file not shown.
1
service/minio/.minio.sys/format.json
Normal file
1
service/minio/.minio.sys/format.json
Normal file
@ -0,0 +1 @@
|
||||
{"version":"1","format":"xl-single","id":"d3e5292e-7b32-44b2-b4ec-545e359fc2db","xl":{"version":"3","this":"88e54d68-0e10-4257-825d-eab6afe6bf5c","sets":[["88e54d68-0e10-4257-825d-eab6afe6bf5c"]],"distributionAlgo":"SIPMOD+PARITY"}}
|
BIN
service/minio/.minio.sys/pool.bin/xl.meta
Normal file
BIN
service/minio/.minio.sys/pool.bin/xl.meta
Normal file
Binary file not shown.
BIN
service/minio/prefect-deployment/test_prefect/.gitignore/xl.meta
Normal file
BIN
service/minio/prefect-deployment/test_prefect/.gitignore/xl.meta
Normal file
Binary file not shown.
Binary file not shown.
BIN
service/minio/prefect-deployment/test_prefect/README.md/xl.meta
Normal file
BIN
service/minio/prefect-deployment/test_prefect/README.md/xl.meta
Normal file
Binary file not shown.
Binary file not shown.
BIN
service/minio/prefect-deployment/test_prefect/env/Dockerfile/xl.meta
vendored
Normal file
BIN
service/minio/prefect-deployment/test_prefect/env/Dockerfile/xl.meta
vendored
Normal file
Binary file not shown.
BIN
service/minio/prefect-deployment/test_prefect/env/build_infra.sh/xl.meta
vendored
Normal file
BIN
service/minio/prefect-deployment/test_prefect/env/build_infra.sh/xl.meta
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
1
service/prefect/memo_store.toml
Normal file
1
service/prefect/memo_store.toml
Normal file
@ -0,0 +1 @@
|
||||
block_auto_registration = "0016c72a6d80786d0b6c25d0bf5fed0752a58b96c7b3643cd9c27c2b250c6a75"
|
BIN
service/prefect/prefect.db
Normal file
BIN
service/prefect/prefect.db
Normal file
Binary file not shown.
15
test_asyncio.py
Normal file
15
test_asyncio.py
Normal file
@ -0,0 +1,15 @@
|
||||
# test_asyncio.py
|
||||
#
|
||||
# author: deng
|
||||
# date : 20230328
|
||||
|
||||
import asyncio
|
||||
|
||||
|
||||
async def main():
|
||||
await asyncio.sleep(1)
|
||||
print('hello')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
70
test_prefect-deployment.yaml
Normal file
70
test_prefect-deployment.yaml
Normal file
@ -0,0 +1,70 @@
|
||||
###
|
||||
### A complete description of a Prefect Deployment for flow 'elevator'
|
||||
###
|
||||
name: test_prefect
|
||||
description: this is a cute elevator
|
||||
version: 44e21b51f98000011b5960b821f33b31
|
||||
# The work queue that will handle this deployment's runs
|
||||
work_queue_name: default
|
||||
work_pool_name: default-agent-pool
|
||||
tags:
|
||||
- test_prefect
|
||||
parameters: {}
|
||||
schedule: null
|
||||
is_schedule_active: true
|
||||
infra_overrides: {}
|
||||
|
||||
###
|
||||
### DO NOT EDIT BELOW THIS LINE
|
||||
###
|
||||
flow_name: elevator
|
||||
manifest_path: null
|
||||
infrastructure:
|
||||
type: docker-container
|
||||
env:
|
||||
USE_SSL: 'False'
|
||||
AWS_ACCESS_KEY_ID: root
|
||||
AWS_SECRET_ACCESS_KEY: minio_password
|
||||
ENDPOINT_URL: http://127.0.0.1:9000
|
||||
labels: {}
|
||||
name: null
|
||||
command: null
|
||||
image: test_prefect:20230328-1712
|
||||
image_pull_policy: IF_NOT_PRESENT
|
||||
image_registry: null
|
||||
networks:
|
||||
- prefect
|
||||
network_mode: null
|
||||
auto_remove: true
|
||||
volumes: []
|
||||
stream_output: true
|
||||
memswap_limit: null
|
||||
mem_limit: null
|
||||
privileged: false
|
||||
_is_anonymous: false
|
||||
_block_document_name: test-prefec-infra
|
||||
_block_document_id: c0336aea-3d1e-4c77-b2fe-66e53263a1cc
|
||||
block_type_slug: docker-container
|
||||
_block_type_slug: docker-container
|
||||
storage:
|
||||
basepath: s3://prefect-deployment/test_prefect
|
||||
settings:
|
||||
use_ssl: false
|
||||
key: root
|
||||
secret: minio_password
|
||||
client_kwargs:
|
||||
endpoint_url: http://127.0.0.1:9000
|
||||
_is_anonymous: false
|
||||
_block_document_name: test-prefect-storage
|
||||
_block_document_id: 9d763296-4114-4a72-8fa7-c9c7c2464b9c
|
||||
block_type_slug: remote-file-system
|
||||
_block_type_slug: remote-file-system
|
||||
path: null
|
||||
entrypoint: build_flow.py:elevator
|
||||
parameter_openapi_schema:
|
||||
title: Parameters
|
||||
type: object
|
||||
properties: {}
|
||||
required: null
|
||||
definitions: null
|
||||
timestamp: '2023-03-28T09:22:52.437835+00:00'
|
Reference in New Issue
Block a user