目录
利用有无噪音的正余弦函数理解相关性指标的不同(多图绘制Pearson系数、最大信息系数MIC)
利用有无噪音的正余弦函数理解相关性指标的不同(多图绘制Pearson系数、最大信息系数MIC)
输出结果
实现代码
1. 2. 3. 4. #ML之MIC:利用有无噪音的正余弦函数理解相关性指标的不同(多图绘制Pearson系数、最大信息系数MIC) 5. 6. import numpy as np 7. import matplotlib.pyplot as plt 8. from minepy import MINE 9. 10. 11. def mysubplot(x, y, numRows, numCols, plotNum, 12. xlim=(-4, 4), ylim=(-4, 4)): 13. 14. r = np.around(np.corrcoef(x, y)[0, 1], 1) 15. mine = MINE(alpha=0.6, c=15) 16. mine.compute_score(x, y) 17. mic = np.around(mine.mic(), 1) 18. ax = plt.subplot(numRows, numCols, plotNum, 19. xlim=xlim, ylim=ylim) 20. ax.set_title('Pearson r=%.1f\nMIC=%.1f' % (r, mic),fontsize=10) 21. ax.set_frame_on(False) 22. ax.axes.get_xaxis().set_visible(False) 23. ax.axes.get_yaxis().set_visible(False) 24. ax.plot(x, y, ',') 25. ax.set_xticks([]) 26. ax.set_yticks([]) 27. return ax 28. 29. def rotation(xy, t): 30. return np.dot(xy, [[np.cos(t), -np.sin(t)], 31. [np.sin(t), np.cos(t)]]) 32. 33. def mvnormal(n=1000): 34. cors = [1.0, 0.8, 0.4, 0.0, -0.4, -0.8, -1.0] 35. for i, cor in enumerate(cors): 36. cov = [[1, cor],[cor, 1]] 37. xy = np.random.multivariate_normal([0, 0], cov, n) 38. mysubplot(xy[:, 0], xy[:, 1], 3, 7, i+1) 39. 40. def rotnormal(n=1000): 41. ts = [0, np.pi/12, np.pi/6, np.pi/4, np.pi/2-np.pi/6, 42. np.pi/2-np.pi/12, np.pi/2] 43. cov = [[1, 1],[1, 1]] 44. xy = np.random.multivariate_normal([0, 0], cov, n) 45. for i, t in enumerate(ts): 46. xy_r = rotation(xy, t) 47. mysubplot(xy_r[:, 0], xy_r[:, 1], 3, 7, i+8) 48. 49. def others(n=1000): 50. x = np.random.uniform(-1, 1, n) 51. y = 4*(x**2-0.5)**2 + np.random.uniform(-1, 1, n)/3 52. mysubplot(x, y, 3, 7, 15, (-1, 1), (-1/3, 1+1/3)) 53. 54. y = np.random.uniform(-1, 1, n) 55. xy = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=1) 56. xy = rotation(xy, -np.pi/8) 57. lim = np.sqrt(2+np.sqrt(2)) / np.sqrt(2) 58. mysubplot(xy[:, 0], xy[:, 1], 3, 7, 16, (-lim, lim), (-lim, lim)) 59. 60. xy = rotation(xy, -np.pi/8) 61. lim = np.sqrt(2) 62. mysubplot(xy[:, 0], xy[:, 1], 3, 7, 17, (-lim, lim), (-lim, lim)) 63. 64. y = 2*x**2 + np.random.uniform(-1, 1, n) 65. mysubplot(x, y, 3, 7, 18, (-1, 1), (-1, 3)) 66. 67. y = (x**2 + np.random.uniform(0, 0.5, n)) * \ 68. np.array([-1, 1])[np.random.random_integers(0, 1, size=n)] 69. mysubplot(x, y, 3, 7, 19, (-1.5, 1.5), (-1.5, 1.5)) 70. 71. y = np.cos(x * np.pi) + np.random.uniform(0, 1/8, n) 72. x = np.sin(x * np.pi) + np.random.uniform(0, 1/8, n) 73. mysubplot(x, y, 3, 7, 20, (-1.5, 1.5), (-1.5, 1.5)) 74. 75. xy1 = np.random.multivariate_normal([3, 3], [[1, 0], [0, 1]], int(n/4)) 76. xy2 = np.random.multivariate_normal([-3, 3], [[1, 0], [0, 1]], int(n/4)) 77. xy3 = np.random.multivariate_normal([-3, -3], [[1, 0], [0, 1]], int(n/4)) 78. xy4 = np.random.multivariate_normal([3, -3], [[1, 0], [0, 1]], int(n/4)) 79. xy = np.concatenate((xy1, xy2, xy3, xy4), axis=0) 80. mysubplot(xy[:, 0], xy[:, 1], 3, 7, 21, (-7, 7), (-7, 7)) 81. 82. plt.figure(facecolor='white') 83. mvnormal(n=800) 84. rotnormal(n=200) 85. others(n=800) 86. plt.tight_layout() 87. 88. plt.suptitle('Understand the difference of correlation index (Pearson VS MIC)') 89. plt.show() 90. 91. 92. 93. 94. 95. 96. 97.