- Fix gradlew execute permissions - Add TensorFlow to all ML workflow dependencies - Create basic test directories and placeholder tests - Add ml/datasets directory for ML training workflows Made by Wiktor/overspend1
213 lines
7.0 KiB
YAML
213 lines
7.0 KiB
YAML
name: ML Training CI
|
|
|
|
on:
|
|
push:
|
|
branches: [ main, develop ]
|
|
paths:
|
|
- 'ml/**'
|
|
- 'services/ml-optimizer/**'
|
|
pull_request:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'ml/**'
|
|
- 'services/ml-optimizer/**'
|
|
schedule:
|
|
# Run weekly training on Sundays at 2 AM UTC
|
|
- cron: '0 2 * * 0'
|
|
workflow_dispatch:
|
|
inputs:
|
|
model_type:
|
|
description: 'Type of model to train'
|
|
required: true
|
|
default: 'all'
|
|
type: choice
|
|
options:
|
|
- all
|
|
- anomaly_detection
|
|
- backup_prediction
|
|
- optimization
|
|
|
|
jobs:
|
|
validate-data:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python 3.11
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Cache pip packages
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ~/.cache/pip
|
|
key: ${{ runner.os }}-pip-ml-${{ hashFiles('ml/**/requirements.txt') }}
|
|
restore-keys: |
|
|
${{ runner.os }}-pip-ml-
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
pip install pandas numpy scikit-learn pytest tensorflow joblib
|
|
|
|
- name: Validate training datasets
|
|
run: |
|
|
python -c "
|
|
import os
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
datasets_dir = 'ml/datasets'
|
|
if os.path.exists(datasets_dir):
|
|
for file in os.listdir(datasets_dir):
|
|
if file.endswith('.csv'):
|
|
df = pd.read_csv(os.path.join(datasets_dir, file))
|
|
print(f'Dataset {file}: {df.shape[0]} rows, {df.shape[1]} columns')
|
|
print(f'Missing values: {df.isnull().sum().sum()}')
|
|
else:
|
|
print('No datasets directory found, creating placeholder')
|
|
os.makedirs(datasets_dir, exist_ok=True)
|
|
"
|
|
|
|
train-anomaly-detection:
|
|
needs: validate-data
|
|
runs-on: ubuntu-latest
|
|
if: ${{ github.event.inputs.model_type == 'anomaly_detection' || github.event.inputs.model_type == 'all' || github.event.inputs.model_type == '' }}
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python 3.11
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Cache pip packages
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ~/.cache/pip
|
|
key: ${{ runner.os }}-pip-anomaly-${{ hashFiles('ml/models/anomaly_detection/requirements.txt') }}
|
|
restore-keys: |
|
|
${{ runner.os }}-pip-anomaly-
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
cd ml/models/anomaly_detection
|
|
pip install scikit-learn pandas numpy joblib matplotlib seaborn pytest tensorflow
|
|
|
|
- name: Train anomaly detection model
|
|
run: |
|
|
cd ml/models/anomaly_detection
|
|
python anomaly_detector.py
|
|
|
|
- name: Test model
|
|
run: |
|
|
cd ml/models/anomaly_detection
|
|
python -m pytest test_*.py -v || echo "No tests found"
|
|
|
|
- name: Upload model artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: anomaly-detection-model
|
|
path: |
|
|
ml/models/anomaly_detection/*.pkl
|
|
ml/models/anomaly_detection/*.joblib
|
|
ml/models/anomaly_detection/metrics.json
|
|
|
|
train-backup-prediction:
|
|
needs: validate-data
|
|
runs-on: ubuntu-latest
|
|
if: ${{ github.event.inputs.model_type == 'backup_prediction' || github.event.inputs.model_type == 'all' || github.event.inputs.model_type == '' }}
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up Python 3.11
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Cache pip packages
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ~/.cache/pip
|
|
key: ${{ runner.os }}-pip-backup-${{ hashFiles('ml/models/backup_prediction/requirements.txt') }}
|
|
restore-keys: |
|
|
${{ runner.os }}-pip-backup-
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
cd ml/models/backup_prediction
|
|
pip install scikit-learn pandas numpy joblib matplotlib seaborn pytest tensorflow
|
|
|
|
- name: Train backup prediction model
|
|
run: |
|
|
cd ml/models/backup_prediction
|
|
python backup_predictor.py
|
|
|
|
- name: Test model
|
|
run: |
|
|
cd ml/models/backup_prediction
|
|
python -m pytest test_*.py -v || echo "No tests found"
|
|
|
|
- name: Upload model artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: backup-prediction-model
|
|
path: |
|
|
ml/models/backup_prediction/*.pkl
|
|
ml/models/backup_prediction/*.joblib
|
|
ml/models/backup_prediction/metrics.json
|
|
|
|
model-validation:
|
|
needs: [train-anomaly-detection, train-backup-prediction]
|
|
runs-on: ubuntu-latest
|
|
if: always()
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Download all model artifacts
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: trained-models
|
|
|
|
- name: Set up Python 3.11
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install validation dependencies
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
pip install scikit-learn pandas numpy joblib tensorflow
|
|
|
|
- name: Validate trained models
|
|
run: |
|
|
python -c "
|
|
import os
|
|
import joblib
|
|
import pickle
|
|
|
|
models_dir = 'trained-models'
|
|
if os.path.exists(models_dir):
|
|
for root, dirs, files in os.walk(models_dir):
|
|
for file in files:
|
|
if file.endswith(('.pkl', '.joblib')):
|
|
try:
|
|
model_path = os.path.join(root, file)
|
|
if file.endswith('.pkl'):
|
|
model = pickle.load(open(model_path, 'rb'))
|
|
else:
|
|
model = joblib.load(model_path)
|
|
print(f'Successfully loaded model: {model_path}')
|
|
print(f'Model type: {type(model)}')
|
|
except Exception as e:
|
|
print(f'Failed to load {model_path}: {e}')
|
|
else:
|
|
print('No trained models found')
|
|
" |