import numpy as np import pandas as pd import matplotlib.pyplot as pt data1 = pd.read_csv('stage1_labels.csv') X = data1.iloc[:, :-1].values y = data1.iloc[:, 1].values from sklearn.preprocessing import LabelEncoder, OneHotEncoder label_X = LabelEncoder() X[:,0] = label_X.fit_transform(X[:,0]) encoder = OneHotEncoder(categorical_features = ) X = encoder.fit_transform(X).toarray() from sklearn.cross_validation import train_test_split X_train, X_test, y_train,y_test = train_test_split(X, y, test_size = 0.4, random_state = 0) #fitting Simple Regression to training set from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, y_train) #predecting the test set results y_pred = regressor.predict(X_test) #Visualization of the training set results pt.scatter(X_train, y_train, color = 'red') pt.plot(X_train, regressor.predict(X_train), color = 'green') pt.title('salary vs yearExp (Training set)') pt.xlabel('years of experience') pt.ylabel('salary') pt.show()
I need a help understanding the error in while executing the above code. Below is the error:
“raise ValueError(“x and y must be the same size”)”
I have .csv file with 1398 rows and 2 column. I have taken 40% as y_test set, as it is visible in the above code.