面向对象程序设计课程设计:利用决策树方法判定西瓜质量(二)

简介: 面向对象程序设计课程设计:利用决策树方法判定西瓜质量

第三章 总结与感悟

3.1程序设计中的不足之处

3.2对答辩中问题的解答

3.3感悟

附录:部分核心代码(完整代码请移步下载区)

treenode.h

#ifndef TREENODE_H
#define TREENODE_H
#endif // TREENODE_H
#pragma once
#include<map>
#include<vector>
#include<string>
#include<iostream>
#include<fstream>
//#include"Tree.h"
using namespace std;
class TreeNode
{
public:
    void generate_childNode(double data[18][10],int size);
    void splitdata();
    double log2x(double x);
    int findIndexOfAttribute();
    //计算信息增益最大值所对应的属性的位置
    double calcEntropy(int total_size,int true_size);
    double calc_gain(int kind);
    //参数为属性的序号值
    bool stop(double temp_data[18][10], int size);
    TreeNode()
    {
        Continuouspoint[2] = { 0 };
        breakingpoint = 0;
        entD = 0;
        indexOfAttribute = 0;
        isContinuous = 0;
        isGood = 0;
        isLeafNode = 0;
        rem_wat[18][10] = { 0 };
        sizeOfWatermelons = { 0 };
        attSize = 9;
        for (int i = 0; i < 9; i++)
            rem_att[i] = i;
    }
    void setNodeAttribute(int indexOf)
    {
        this->indexOfAttribute = indexOf;
    }
    virtual ~TreeNode()
    {
    }
    TreeNode(double Data[18][10] , int n,int atts[10],int att_size)
    {
        isLeafNode = false;
        isContinuous = false;
        breakingpoint = 0;
        isGood = false;
        indexOfAttribute = 0;
        sizeOfWatermelons= n;
        childTree.clear();
        attSize = att_size;
        for (int i = 0; i < att_size; i++)
            rem_att[i] = atts[i];
        for (int i = 0; i < sizeOfWatermelons; i++)
        {
            for (int j = 0; j < 10; j++)
                rem_wat[i][j] = Data[i][j];
        }
        //计算未划分时的信息熵
        int RootT = 0; //未划分时的正例
        for (int i = 0; i < sizeOfWatermelons; i++)//统计正例个数
            if (rem_wat[i][9] == 1)
                RootT++;
        entD =calcEntropy(sizeOfWatermelons, RootT);
    }
    void deleteatt(int index)
    {
        int i;
        for ( i = 0; i < attSize; i++)
        {
            if (rem_att[i] == index)
                break;
        }
        for (int j = i; j < attSize - 1; j++)
            rem_att[j] = rem_att[j + 1];
        attSize--;
    }
    bool judgeByTree(double data[10]);
    void WriteGain();
private:
    static int attribute_size[10];
    double Continuouspoint[2];
    int indexOfAttribute;
    //当前节点的属性对应的位置
    double rem_wat[18][10];
    //剩下的没分完的西瓜的
    int sizeOfWatermelons;
    //剩下的西瓜的个数
    bool isLeafNode;
    //是否为叶子结点
    bool isContinuous;
    //是否为连续值
    double breakingpoint;
    //若是连续值,则连续值的间断点
    vector<TreeNode> childTree;
    //若不是叶结点,则子结点的地址
    bool isGood;
    //若是叶结点,则是否为好瓜
    double entD;
    //初始信息熵
    int rem_att[10];
    //剩余的属性
    int attSize;
    //余下属性的序号
};

treenode.cpp

#pragma once
#include "TreeNode.h"
#include<vector>
#include<Windows.h>
#include<iostream>
#include<fstream>
using namespace std;
int TreeNode::attribute_size[10] = { -1,3,3,3,3,3,2,-1,-1,2 };//每个属性分别对应着几种取值
bool TreeNode::stop(double temp_data[18][10], int size)
{
    //如果所有数据属于同一类,则停止分类
    int  sumT = 0;
    for (int i = 0; i < size; i++)
        if (temp_data[i][9] == 1)
            sumT++;
    if (sumT == 0)
        return true;
    else if (sumT == size)
        return true;
    else
        return false;
}
double TreeNode::log2x(double x)
{
    double den = log10(2), num = log10(x), sum;
    sum = num / den;
    return sum;
}
double TreeNode::calcEntropy(int total_size, int true_size)
{
    //所有实例数量、正例数量
    double entropy, Texample, Fexample, Tproportion, Fproportion;//信息熵;正例信息熵;反例信息熵;正例占比;反例占比
    Tproportion = true_size * 1.0 / total_size;
    Fproportion = (total_size - true_size) * 1.0 / total_size;
    if (Fproportion == 0)
    {
        entropy = (-1) * Tproportion * log2x(Tproportion);
        return entropy;
    }
    else if (Tproportion == 0)
    {
        entropy = (-1) * Fproportion * log2x(Fproportion);
        return entropy;
    }
    else
    {
        Texample = Tproportion * log2x(Tproportion);
        Fexample = Fproportion * log2x(Fproportion);
        entropy = -Texample - Fexample;
        return entropy;
    }
}
double TreeNode::calc_gain(int kind)
{
    this->isContinuous = false;
    this->breakingpoint = 0;
    if (kind == 1 || kind == 2 || kind == 3 || kind == 4 || kind == 5 || kind == 6)//具有离散值的属性
    {
        //遍历,用于计算该节点的类别个数
        int ZeroKind = 0, OneKind = 0, TwoKind = 0;
        for (int i = 0; i < sizeOfWatermelons; i++)
        {
            if (rem_wat[i][kind] == 0)
                ZeroKind++;
            else if (rem_wat[i][kind] == 1)
                OneKind++;
            else
                TwoKind++;
        }
        if (ZeroKind != 0 && OneKind != 0 && TwoKind != 0)//各属性中具有三个类别的信息增益
        {
            //统计各类别的总数、正例数量
            int OneTrue = 0, TwoTrue = 0, ZeroTrue = 0, OneSum = 0, TwoSum = 0, ZeroSum = 0;
            for (int i = 0; i < sizeOfWatermelons; i++)
            {
                if (rem_wat[i][9] == 1)//好瓜
                {
                    if (rem_wat[i][kind] == 0)
                        ZeroTrue++;
                    else if (rem_wat[i][kind] == 1)
                        OneTrue++;
                    else
                        TwoTrue++;
                }
                else//不是好瓜
                {
                    if (rem_wat[i][kind] == 0)
                        ZeroSum++;
                    else if (rem_wat[i][kind] == 1)
                        OneSum++;
                    else
                        TwoSum++;
                }
            }
            ZeroSum += ZeroTrue; OneSum += OneTrue; TwoSum += TwoTrue;
            //三种信息熵
            double ZeroEntropy, OneEntropy, TwoEntropy;
            ZeroEntropy = calcEntropy(ZeroSum, ZeroTrue);
            OneEntropy = calcEntropy(OneSum, OneTrue);
            TwoEntropy = calcEntropy(TwoSum, TwoTrue);
            //信息增益
            double gain = entD - ZeroSum * ZeroEntropy * 1.0 / sizeOfWatermelons - OneSum * OneEntropy * 1.0 / sizeOfWatermelons - TwoSum * TwoEntropy * 1.0 / sizeOfWatermelons;
            return gain;
        }
        else if ((ZeroKind != 0 && OneKind != 0 && TwoKind == 0) || (ZeroKind == 0 && OneKind != 0 && TwoKind != 0) || (ZeroKind != 0 && OneKind == 0 && TwoKind != 0)) //各属性中具有两个类别的信息增益
        {//统计各类别的总数、正例数量
            int OneTrue = 0, ZeroTrue = 0, OneSum = 0, ZeroSum = 0;
            for (int i = 0; i < sizeOfWatermelons; i++)
            {
                if (rem_wat[i][9] == 1)//好瓜
                {
                    if (rem_wat[i][kind] == 0)
                        ZeroTrue++;
                    else
                        OneTrue++;
                }
                else//不是好瓜
                {
                    if (rem_wat[i][kind] == 0)
                        ZeroSum++;
                    else
                        OneSum++;
                }
            }
            ZeroSum += ZeroTrue; OneSum += OneTrue;
            //两种信息熵
            double ZeroEntropy, OneEntropy;
            ZeroEntropy = calcEntropy(ZeroSum, ZeroTrue);
            OneEntropy = calcEntropy(OneSum, OneTrue);
            //信息增益
            double gain = entD - ZeroSum * ZeroEntropy * 1.0 / sizeOfWatermelons - OneSum * OneEntropy * 1.0 / sizeOfWatermelons;
            return gain;
        }
        else//属性里只有一个类别,信息熵是0;信息增益max=entD但不可以按照该类别划分,按0处理
        {
            return entD;
        }
    }
    else //连续值属性
    {//arrange存储连续属性数据,cQuality储存对应的好瓜判断,
        double* arrange = new double[sizeOfWatermelons];
        double* cQuality = new double[sizeOfWatermelons];
        for (int i = 0; i < sizeOfWatermelons; i++)
        {
            arrange[i] = rem_wat[i][kind];
            cQuality[i] = rem_wat[i][9];
        }
        //从小到大排序
        double tempt, t;
        for (int m = 0; m < sizeOfWatermelons - 1; m++)
        {
            int min = m;
            for (int i = m + 1; i < sizeOfWatermelons; i++)
                if (arrange[i] < arrange[min])
                    min = i;
            //交换当前点和最小值点
            tempt = arrange[min];
            arrange[min] = arrange[m];
            arrange[m] = tempt;
            //交换当前点的好坏瓜和最小值点的好坏瓜
            t = cQuality[min];
            cQuality[min] = cQuality[m];
            cQuality[m] = t;
        }
        //按照算法取端点值存储到point数组,二分法
        double* point = new double[sizeOfWatermelons - 1];
        for (int i = 0; i < sizeOfWatermelons - 1; i++)
            point[i] = (arrange[i] + arrange[i + 1]) / 2;
        //计算不同端点处的信息增益并储存到PointGain数组中
        int i;//端点下标
        double* PointGain = new double[sizeOfWatermelons - 1];
        double BeforeEntropy, AfterEntropy;
        //以端点为界限分两组,并统计两组的正例
        for (i = 0; i < sizeOfWatermelons - 1; i++)
        {
            int BeforeTrue = 0, AfterTrue = 0;
            for (int m = 0; m < i + 1; m++)
                if (cQuality[m] == 1)
                    BeforeTrue++;
            for (int m = i + 1; m < sizeOfWatermelons; m++)
                if (cQuality[m] == 1)
                    AfterTrue++;
            //两种信息熵,记录端点左边的和端点右边的
            BeforeEntropy = calcEntropy(i + 1, BeforeTrue);
            AfterEntropy = calcEntropy(sizeOfWatermelons - 1 - i, AfterTrue);
            //信息增益
            PointGain[i] = entD - (i + 1) * BeforeEntropy * 1.0 / sizeOfWatermelons - (sizeOfWatermelons - 1 - i) * AfterEntropy * 1.0 / sizeOfWatermelons;
        }
        int max = 0;
        for (int i = 1; i < sizeOfWatermelons - 1; i++)
            if (PointGain[i] > PointGain[max])
                max = i;
        this->Continuouspoint[kind-7] = point[max];
//    cout << "continue=" << Continuouspoint[kind - 7]<<endl;
        this->breakingpoint = this->Continuouspoint[kind - 7];
        double gain = PointGain[max];
        delete[]PointGain;
        delete[]point;
        delete[]arrange;
        delete[]cQuality;
        return gain;
    }
}
void TreeNode::generate_childNode(double data[18][10],int size)
{
    if ((this->stop(data, size)) || (size == 0) || this->isLeafNode)
    {
        this->isLeafNode = true;
        this->isGood = bool(data[0][9]);
//    cout <<bool( isGood);
        return;
    }
    else
    {
        //从A中选择最优划分属性a*
        this->isLeafNode = false;
        this->indexOfAttribute = 0;
        double maxgain = 0;
        for (int i = 0; i < attSize; i++)
        {
            if (this->calc_gain(rem_att[i]) > maxgain)
            {
                indexOfAttribute = rem_att[i];
                maxgain = this->calc_gain(rem_att[i]);
            }
        }
//    cout << indexOfAttribute;
        this->deleteatt(indexOfAttribute);
    //  cout << endl;
        //生成分支
        if ((indexOfAttribute >= 1) && (indexOfAttribute <= 6))
        {
            isContinuous = 0;
            double temp_data[18][10];
            int sizeOfTemp_data = 0;
            for (int i = 0; i < attribute_size[indexOfAttribute]; i++)
            {
                sizeOfTemp_data = 0;
                for (int j = 0; j < sizeOfWatermelons; j++)
                {
                    if (data[j][indexOfAttribute] == i)//复制西瓜,归类
                    {
                        for (int k = 0; k < 10; k++)
                            temp_data[sizeOfTemp_data][k] = data[j][k];
                        sizeOfTemp_data++;
                    }
                }
                TreeNode tempTree(temp_data, sizeOfTemp_data,rem_att,attSize);
                if (sizeOfTemp_data == 0)
                {
                    tempTree.isLeafNode = true;
                    tempTree.isGood = bool(data[0][10]);
                    return;
                }
                tempTree.generate_childNode(temp_data,sizeOfTemp_data);
                childTree.push_back(tempTree);
            }
            return;
        }
        if (indexOfAttribute == 7 || indexOfAttribute == 8)
        {
            isContinuous = 1;
            double temp_data1[18][10];
            double temp_data2[18][10];
            int sizeOfTemp_data = 0;
            breakingpoint = Continuouspoint[indexOfAttribute - 7];
//      cout <<"breakingpoint="<< this->breakingpoint<<endl;
            for (int i = 0; i < sizeOfWatermelons; i++)
            {
                if (data[i][indexOfAttribute] <= breakingpoint)
                {
                    for (int k = 0; k < 10; k++)
                        temp_data1[sizeOfTemp_data][k] = data[i][k];
                    sizeOfTemp_data++;
                }
            }
            TreeNode tempTree1(temp_data1, sizeOfTemp_data, rem_att, attSize);
            if (sizeOfTemp_data == 0)
            {
                tempTree1.isLeafNode = true;
                tempTree1.isGood = bool(data[0][10]);
                return;
            }
            tempTree1.generate_childNode(temp_data1, sizeOfTemp_data);
            childTree.push_back(tempTree1);
            for (int i = 0; i < sizeOfWatermelons; i++)
                if (data[i][indexOfAttribute] > breakingpoint)
                {
                    for (int k = 0; k < 10; k++)
                        temp_data2[sizeOfTemp_data][k] = false;
                    sizeOfTemp_data++;
                }
            TreeNode tempTree2(temp_data2, sizeOfTemp_data, rem_att, attSize);
            if (sizeOfTemp_data == 0)
            {
                tempTree2.isLeafNode = true;
                tempTree2.isGood = bool(data[0][10]);
                return;
            }
            tempTree2.generate_childNode(temp_data2, sizeOfTemp_data);
            childTree.push_back(tempTree2);
            }
    }
        return;
}
bool TreeNode::judgeByTree(double data[10])
{
    if (isLeafNode)
        return isGood;//如果这个结点就是叶结点,则叶结点的isgood属性记录了这个西瓜是好是坏,返回
    else//这个结点不是叶结点
    {
        int i = 0;//访问哪个子结点
        int index;//第几项属性
        index = indexOfAttribute;
        //处理判断离散值,为0则进入第0个子结点,为1则进入第1个子结点……
        if ((index >= 1) && (index <= 6))
        {
            //attribute_size表示这种属性有几种取值
            for (i = 0; i < attribute_size[index]; i++)
            {
                if (data[index] == i)//找到西瓜的第index项属性对应的值
                    break;
            }
        }
        //判断连续值,小于间断点则进第0个子结点,大于间断点则进第1个子结点
        if ((index == 7) || (index == 8))
        {
            if (data[index] < breakingpoint)
                i = 0;
            else
                i = 1;
        }
        //交给子决策树继续判断西瓜的好坏
        return childTree[i].judgeByTree(data);
    }
}
void TreeNode::WriteGain()
{
    //计算各属性的信息增益
    double gain[9];
    for (int i = 1; i < 9; i++)
        gain[i] = calc_gain(i);
    //写入文件
    fstream binfile;
    binfile.open("d:\\a\\Gain.txt", ios::binary | ios::out | ios::ate);
    if (!binfile)
    {
        cerr << "Tdata.csv open or create error!" << endl;
        exit(1);
    }
    for (int i = 1; i < 9; i++)//输出
        binfile.write((char *)&gain[i], sizeof(double));
    for (int i = 1; i < 9; i++)//输出
        cout<<gain[i]<<" ";
    cout<<endl;
    binfile.close();
}

main.cpp

#include<iostream>
#include<fstream>
#include<math.h>
#include<iomanip>
#include<cstring>
#include<time.h>
#include<random>
#include<stdlib.h>
#include "treenode.h"
double ListData[50000][10];//50000个瓜;9:0ID,1色泽,2根蒂,3敲声,4纹理,5脐部,6触感,7密度,8含糖率,9好瓜[1好瓜]
int main(int argc, char *argv[])
{
    clock_t start, end;
    start = clock();
        ifstream bin;
        double data[17][10];
        bin.open("D:\\a\\watermelon.csv", ios::binary);
        if (!bin)
        {
            cerr << "data.csv open or create error!" << endl;
            exit(1);
        }
        for (int i = 0; i < 17; i++)
            for (int k = 0; k < 10; k++)
                bin.read((char*)&data[i][k], sizeof(double));
        bin.close();
     /*   cout << "原始数据:" << endl;
        for (int i = 0; i < 17; i++)
        {
            for (int j = 0; j < 10; j++)
                cout << data[i][j] << " ";
            cout << endl;
        }*/
        for (int i = 0; i < 50000; i++)
        {
            ListData[i][0] = i + 1;
            for (int k = 1; k < 6; k++)
                ListData[i][k] = rand() % 3;
            ListData[i][6] = rand() % 2;
            ListData[i][7] = rand() % 1001;
            ListData[i][7] /= 1000;
            ListData[i][8] = rand() % 1001;
            ListData[i][8] /= 1000;
            ListData[i][9] = rand() & 1;
        }
        fstream randomfile;
        randomfile.open("D:\\a\\data.csv", ios::binary | ios::out | ios::trunc);
        if (!randomfile)
        {
            cerr << "Tdata.csv open or create error!" << endl;
            exit(1);
        }
        for (int i = 0; i <50000 ; i++)//输出
            for (int k = 0; k < 9; k++)
                randomfile.write((char*)&ListData[i][k], sizeof(double));
        randomfile.close();
        int rem_att[8] = { 1,2,3,4,5,6,7,8};//开局剩下8个属性没分,从色泽到含糖率
        TreeNode myTree(data, 17,rem_att,8);
        myTree.WriteGain();
 //       system("pause");
        myTree.generate_childNode(data, 17);
        for (int i = 0; i < 50000; i++)
            if (myTree.judgeByTree(ListData[i]))
                ListData[i][9] = 1;
            else
                ListData[i][9] = 0;
      /*  cout << "测试结果:"<<endl;*/
        fstream binfile;
        binfile.open("D:\\a\\result.csv", ios::out | ios::trunc);
        if (!binfile)
        {
            cerr << "Tdata.csv open or create error!" << endl;
            exit(1);
        }
        for (int i = 0; i <50000 ; i++)//输出
            for (int k = 0; k < 10; k++)
                binfile.write((char*)&ListData[i][k], sizeof(double));
        binfile.close();
        for (int i = 0; i < 50000; i++)
        {
            for (int j = 0; j < 10; j++)
                cout << ListData[i][j] << " ";
            cout << endl;
        }
        for(i=0;i<50000;i++) judgebytree(data[i]);
        /* 创建表格视图 */
        /* 显示 */
        end = clock();
        cout << "time = " << double(end - start) / CLOCKS_PER_SEC << "s" << endl;
}
目录
相关文章
|
14天前
|
算法
【软件设计师】常见的算法设计方法——穷举搜索法
【软件设计师】常见的算法设计方法——穷举搜索法
|
21天前
|
资源调度
回归方程优良性评价(原理+实践+代码)
回归方程优良性评价(原理+实践+代码)
回归方程优良性评价(原理+实践+代码)
|
21天前
|
机器学习/深度学习 存储 供应链
【软件设计师备考 专题 】运算基本方法:预测与决策、线性规划、网络图、模拟
【软件设计师备考 专题 】运算基本方法:预测与决策、线性规划、网络图、模拟
67 0
|
存储 人工智能 安全
Java面向对象程序设计综合练习3(选择题)
Java面向对象程序设计综合练习3(选择题)
373 0
|
12月前
离散数学笔记_第一章:逻辑和证明(3)
离散数学笔记_第一章:逻辑和证明(3)
133 0
|
12月前
|
自然语言处理 索引
离散数学笔记_第一章:逻辑和证明(2 )
离散数学笔记_第一章:逻辑和证明(2 )
93 0
|
机器学习/深度学习 算法
数据结构与算法关系(中):如何评判一个算法的好坏
大家好,我是MicroStone,一个曾在三家世界500强企业担任要职的一线互联网工程师。上一节,我们了解到算法的一些特征,想必大家都掌握了算法设计要求,在学习或工作中根据业务需求设计要设计一个算法,我们要如何评估一个算法的好坏呐?下面我们来看看算法的度量方式。
134 0
|
机器学习/深度学习 存储 算法
面向对象程序设计课程设计:利用决策树方法判定西瓜质量(一)
面向对象程序设计课程设计:利用决策树方法判定西瓜质量
156 0
面向对象程序设计课程设计:利用决策树方法判定西瓜质量(一)
|
Java
Java面向对象程序设计综合练习3(判断题)
Java面向对象程序设计综合练习3(判断题)
111 0
|
SQL Java 数据库连接
Java面向对象程序设计综合练习4(判断题)
Java面向对象程序设计综合练习4(判断题)
107 0

热门文章

最新文章