TimeSeries/auto at main · swapyface/TimeSeries · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate the IQR range
def is_bad_prediction(predicted_price, price_series):
    q1 = price_series.quantile(0.25)
    q3 = price_series.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr

    # Check if predicted price is within the IQR bounds
    if predicted_price < lower_bound or predicted_price > upper_bound:
        return True  # Bad prediction
    else:
        return False  # Good prediction

# Function to apply moving average and IQR check
def moving_average_with_iqr(df, window_size=3):
    # Sort by date to ensure correct time series order
    df = df.sort_values(by='date')

    # Apply moving average for each rating group
    df['moving_avg'] = df.groupby('rating')['price'].transform(lambda x: x.rolling(window=window_size).mean())

    # Predict the next price as the last available moving average for each rating group
    next_price = df.groupby('rating')['moving_avg'].last().reset_index()
    next_price.columns = ['rating', 'next_predicted_price']

    # Check if the predicted price is within the IQR for each rating
    next_price['is_bad_prediction'] = next_price.apply(lambda row: is_bad_prediction(row['next_predicted_price'], df[df['rating'] == row['rating']]['price']), axis=1)

    return next_price

# Visualization function
def plot_moving_avg_with_iqr(df):
    # Plot the time series data and moving average for each rating group
    fig, ax = plt.subplots(figsize=(12, 6))

    ratings = df['rating'].unique()

    for rating in ratings:
        subset = df[df['rating'] == rating]
        ax.plot(subset['date'], subset['price'], label=f'Price (Rating: {rating})')
        ax.plot(subset['date'], subset['moving_avg'], linestyle='--', label=f'Moving Avg (Rating: {rating})')

    ax.set_title('Price and Moving Average for Each Rating')
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    ax.legend()
    plt.show()

# Sample dataset (pass your own data here)
data = {
    'date': pd.date_range(start='2023-01-01', periods=100),
    'rating': np.random.choice(['A', 'B', 'C'], size=100),
    'price': np.random.randn(100) * 100 + 1000
}
df = pd.DataFrame(data)

# Run the moving average with IQR check
window_size = 3
predictions = moving_average_with_iqr(df, window_size=window_size)

print(predictions)

# Plot the result
plot_moving_avg_with_iqr(df)