4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import LSTM, Dropout, Dense, Input
In [ ]: df = pd.read_csv('Gold Price (2013-2023).csv')
In [ ]: df
Out[10]: Date Price Open High Low Vol. Change %
0 12/30/2022 1,826.20 1,821.80 1,832.40 1,819.80 107.50K 0.01%
1 12/29/2022 1,826.00 1,812.30 1,827.30 1,811.20 105.99K 0.56%
2 12/28/2022 1,815.80 1,822.40 1,822.80 1,804.20 118.08K -0.40%
3 12/27/2022 1,823.10 1,808.20 1,841.90 1,808.00 159.62K 0.74%
4 12/26/2022 1,809.70 1,805.80 1,811.95 1,805.55 NaN 0.30%
... ... ... ... ... ... ... ...
2578 01/08/2013 1,663.20 1,651.50 1,662.60 1,648.80 0.13K 0.97%
2579 01/07/2013 1,647.20 1,657.30 1,663.80 1,645.30 0.09K -0.16%
2580 01/04/2013 1,649.90 1,664.40 1,664.40 1,630.00 0.31K -1.53%
2581 01/03/2013 1,675.60 1,688.00 1,689.30 1,664.30 0.19K -0.85%
2582 01/02/2013 1,689.90 1,675.80 1,695.00 1,672.10 0.06K 0.78%
2583 rows × 7 columns
In [ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2583 entries, 0 to 2582
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 2583 non-null object
1 Price 2583 non-null object
2 Open 2583 non-null object
3 High 2583 non-null object
4 Low 2583 non-null object
5 Vol. 2578 non-null object
6 Change % 2583 non-null object
dtypes: object(7)
memory usage: 141.4+ KB
localhost:8888/notebooks/gold.ipynb 1/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: df.drop(["Vol.","Change %"],axis=1, inplace= True)
In [ ]: df['Date']=pd.to_datetime(df['Date'])
df.sort_values(by='Date',ascending=True,inplace=True)
df.reset_index(drop=True,inplace=True)
In [ ]: NumCols=df.columns.drop(['Date'])
df[NumCols]=df[NumCols].replace({',':''},regex=True)
df[NumCols]=df[NumCols].astype('float64')
In [ ]: df.head()
Out[15]: Date Price Open High Low
0 2013-01-02 1689.9 1675.8 1695.0 1672.1
1 2013-01-03 1675.6 1688.0 1689.3 1664.3
2 2013-01-04 1649.9 1664.4 1664.4 1630.0
3 2013-01-07 1647.2 1657.3 1663.8 1645.3
4 2013-01-08 1663.2 1651.5 1662.6 1648.8
In [ ]: df.duplicated().sum()
Out[16]: 0
In [ ]: df.isnull().sum()
Out[17]: Date 0
Price 0
Open 0
High 0
Low 0
dtype: int64
localhost:8888/notebooks/gold.ipynb 2/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: fig=px.line(y=df.Price,x=df.Date)
fig.update_traces(line_color="black")
fig.update_layout(xaxis_title="Date",
yaxis_title="Price",
title={'text':'Gold price Data','y':0.95,'x':0.5,'xanchor
plot_bgcolor='rgba(255,223,0,0.9)')
In [ ]: test_size=df[df.Date.dt.year==2022].shape[0]
test_size
Out[20]: 260
localhost:8888/notebooks/gold.ipynb 3/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: plt.figure(figsize=(15,6),dpi=150)
plt.rcParams['axes.facecolor']='cyan'
plt.rc('axes',edgecolor='white')
plt.plot(df.Date[:-test_size],df.Price[:-test_size],color='black',lw=2)
plt.plot(df.Date[-test_size:],df.Price[-test_size:],color='red',lw=2)
plt.title('gold price train and test',fontsize=15)
plt.xlabel('year',fontsize=12)
plt.ylabel('price',fontsize=12)
plt.legend(['train set','test set'],loc="upper left",prop={'size':15})
plt.grid(color='white')
plt.show()
In [ ]: scaler=MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1,1))
Out[27]: MinMaxScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or
trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page
with nbviewer.org.
In [ ]: window_size=60
In [ ]: train_data=df.Price[:-test_size]
train_data=scaler.transform(train_data.values.reshape(-1,1))
In [ ]: X_train=[]
Y_train=[]
for i in range(window_size,len(train_data)):
X_train.append(train_data[i-60:i,0])
Y_train.append(train_data[i,0])
In [ ]: test_data=df.Price[-test_size-60:]
test_data=scaler.transform(test_data.values.reshape(-1,1))
localhost:8888/notebooks/gold.ipynb 4/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: X_test=[]
Y_test=[]
for i in range(window_size,len(test_data)):
X_test.append(test_data[i-60:i,0])
Y_test.append(test_data[i,0])
In [ ]: X_train=np.array(X_train)
Y_train=np.array(Y_train)
X_test=np.array(X_test)
Y_test=np.array(Y_test)
In [ ]: X_train=np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))
X_test=np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
Y_train=np.reshape(Y_train,(-1,1))
Y_test=np.reshape(Y_test,(-1,1))
In [ ]: print('X_train shape;',X_train.shape)
print('Y_train shape;',Y_train.shape)
print('X_test shape;',X_test.shape)
print('Y_test shape;',Y_test.shape)
X_train shape; (2263, 60, 1)
Y_train shape; (2263, 1)
X_test shape; (260, 60, 1)
Y_test shape; (260, 1)
In [ ]: def define_model():
input1=Input(shape=(window_size,1))
x=LSTM(units=64,return_sequences=True)(input1)
x=Dropout(0.2)(x)
x=LSTM(units=64,return_sequences=True)(x)
x=Dropout(0.2)(x)
x=LSTM(units=64)(x)
x=Dense(32,activation='softmax')(x)
dnn_output=Dense(1)(x)
model=Model(inputs=input1,outputs=[dnn_output])
model.compile(loss='mean_squared_error',optimizer='Nadam')
model.summary()
return model
localhost:8888/notebooks/gold.ipynb 5/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: model=define_model()
history=model.fit(X_train,Y_train,epochs=150,batch_size=12,validation_split
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 60, 1)] 0
lstm_6 (LSTM) (None, 60, 64) 16896
dropout_4 (Dropout) (None, 60, 64) 0
lstm_7 (LSTM) (None, 60, 64) 33024
dropout_5 (Dropout) (None, 60, 64) 0
lstm_8 (LSTM) (None, 64) 33024
dense_4 (Dense) (None, 32) 2080
dense_5 (Dense) (None, 1) 33
In [ ]: result=model.evaluate(X_test,Y_test)
y_pred=model.predict(X_test)
9/9 [==============================] - 0s 26ms/step - loss: 7.6228e-04
9/9 [==============================] - 0s 27ms/step
In [ ]: MAPE=mean_absolute_percentage_error(Y_test,y_pred)
accuracy=1-MAPE
In [ ]: print('Test Loss:',result)
print('Test MAPE:',MAPE)
print('Test Accuracy:',accuracy)
Test Loss: 0.000762280891649425
Test MAPE: 0.030252884420264053
Test Accuracy: 0.9697471155797359
In [ ]: y_test_true=scaler.inverse_transform(Y_test)
y_test_pred=scaler.inverse_transform(y_pred)
localhost:8888/notebooks/gold.ipynb 6/7
4/17/24, 11:59 AM gold - Jupyter Notebook
In [ ]: plt.figure(figsize=(15,6),dpi=150)
plt.rcParams['axes.facecolor']='yellow'
plt.rc('axes',edgecolor='white')
plt.plot(df['Date'].iloc[:-test_size],scaler.inverse_transform(train_data),
plt.plot(df['Date'].iloc[-test_size:],y_test_true,color='green',lw=2)
plt.plot(df['Date'].iloc[-test_size:],y_test_pred,color='red',lw=2)
plt.title("Model Performance on Gold Price Prediction",fontsize=15)
plt.xlabel('Date',fontsize=12)
plt.ylabel('Price',fontsize=12)
plt.legend(['Training Data','Actual Test Data','Predicted Test Data'],loc='
plt.grid(color='white')
plt.show()
localhost:8888/notebooks/gold.ipynb 7/7