一、背景
这两天在努力记单词,想着应该把最常使用的单词先记下来,从网上找了几篇文章之后分析了一批词汇,效果还算不错;
接着又想到了代码,也好奇开发者最常使用的单词或函数有哪些,我统计了三种类型:系统函数、变量名、自定义函数名等统计,感兴趣的朋友可以将正则设置为自己需要统计的规则。
二、参考代码
<?php function scanMyDir($path) { $path = rtrim($path, '/'); // 打开目录 $dh = opendir($path); // 循环读取目录 while (($file = readdir($dh)) !== false) { // 先要过滤掉当前目录'.'和上一级目录'..' if ($file == '.' || $file == '..' || $file == '.git') continue; if (strpos($file, ".php") > 1) { // 为了能够显示中文目录/文件,需要进行转码 $_SERVER['fileList'][] = $path . '/' . iconv('gbk', 'utf-8', $file); } // 如果该文件仍然是一个目录,进入递归 if (is_dir($path . '/' . $file)) { scanMyDir($path . '/' . $file); } } } function explodeCount($fileName, $pregRule) { $text = file_get_contents($fileName); preg_match_all($pregRule, $text, $keywordArr); foreach ($keywordArr[1] as $keyword) { $_SERVER['count'][$keyword] = $_SERVER['count'][$keyword] ?? 0; $_SERVER['count'][$keyword] += 1; } } function start($path, $pregRule) { scanMyDir($path); //把要统计的文件写入到当前文件夹中,方便查看统计了那些文件 file_put_contents("fileList.txt", implode("\n", $_SERVER['fileList'])); //从上一个文件中读取要统计的文件列表, $fileList = file_get_contents("fileList.txt"); $fileList = explode("\n", $fileList); //遍历统计每个文件中的词汇 foreach ($fileList as $fileName) { explodeCount($fileName, $pregRule); } //将结果写入到文件当中之前,先做好排序 arsort($_SERVER['count']); //只需要前100个 $_SERVER['count'] = array_slice($_SERVER['count'], 0, 100); //将结果写入到文件中去 $sumResult = var_export($_SERVER['count'], true); file_put_contents("countResult.txt", $sumResult); } //要统计的代码目录 $path = '/root/mycode/work/offcn-live/vendor'; //$pregRule = '/ ([a-z]+_?[a-z]+)\(/'; //系统函数规则 //$pregRule = '/(\$[a-zA-Z]+_?[a-zA-Z]+)/'; //变量名规则 $pregRule = '/[->:]+([a-z]+_?[a-z]+)\(/'; //自定义函数名规则 start($path, $pregRule);
三、常用函数
array ( 'array' => 6126, 'isset' => 1345, 'substr' => 845, 'sprintf' => 737, 'strlen' => 650, 'count' => 598, 'unset' => 556, 'array_merge' => 449, 'list' => 413, 'strpos' => 408, 'str_replace' => 393, 'implode' => 348, 'explode' => 333, 'is_array' => 332, 'static' => 297, 'trim' => 263, 'declare' => 238, 'mock' => 237, 'pack' => 232, 'preg_match' => 222, 'is_null' => 210, 'get_class' => 203, 'array_map' => 195, 'self' => 191, 'strtolower' => 190, 'empty' => 183, 'preg_replace' => 180, 'chr' => 169, 'function_exists' => 163, 'user_error' => 161, 'handle' => 158, 'is_string' => 155, 'is_object' => 140, 'str_repeat' => 139, 'array_keys' => 138, 'rewind' => 137, 'in_array' => 133, 'write' => 132, 'mt_rand' => 132, 'array_values' => 129, 'time' => 125, 'not' => 124, 'array_shift' => 124, 'extract' => 120, 'getenv' => 115, 'reset' => 113, 'execute' => 112, 'printf' => 110, 'fopen' => 108, 'get' => 105, 'collect' => 100, 'current' => 100, 'fclose' => 99, 'unpack' => 96, 'strval' => 96, 'matches' => 92, 'rtrim' => 90, 'str_pad' => 88, 'json_encode' => 88, 'array_filter' => 88, 'array_pop' => 85, 'app' => 84, 'range' => 84, 'dirname' => 83, 'define' => 81, 'microtime' => 80, 'foo' => 80, 'create' => 80, 'ord' => 80, 'compact' => 79, 'read' => 77, 'method_exists' => 76, 'register' => 75, 'realpath' => 74, 'intval' => 73, 'bar' => 73, 'strtotime' => 73, 'fread' => 72, 'class_exists' => 72, 'print' => 72, 'max' => 72, 'curl_setopt' => 70, 'fwrite' => 69, 'tap' => 66, 'strtoupper' => 65, 'array_unshift' => 65, 'serialize' => 64, 'ob_start' => 64, 'unserialize' => 63, 'strrpos' => 61, 'key' => 61, 'preg_split' => 61, 'ini_get' => 61, 'add' => 59, 'close' => 59, 'array_slice' => 58, 'putenv' => 57, 'eval' => 57, 'gettype' => 56, 'var_export' => 56, )
四、常用变量名
array ( '$this' => 75572, '$value' => 6303, '$options' => 4731, '$key' => 4597, '$name' => 4367, '$vendorDir' => 4310, '$message' => 4115, '$request' => 3453, '$stackPos' => 3237, '$response' => 2796, '$result' => 2577, '$data' => 2308, '$path' => 2117, '$node' => 1733, '$type' => 1650, '$method' => 1620, '$file' => 1449, '$arguments' => 1415, '$class' => 1408, '$callback' => 1378, '$output' => 1364, '$command' => 1314, '$parameters' => 1273, '$config' => 1252, '$expected' => 1197, '$column' => 1153, '$input' => 1140, '$id' => 1119, '$headers' => 1083, '$event' => 1083, '$args' => 986, '$attributes' => 979, '$length' => 961, '$code' => 950, '$query' => 947, '$prefix' => 947, '$mock' => 930, '$token' => 925, '$context' => 909, '$test' => 892, '$temp' => 884, '$header' => 871, '$matches' => 847, '$object' => 825, '$string' => 813, '$container' => 810, '$server' => 810, '$stream' => 768, '$collection' => 768, '$route' => 761, '$values' => 761, '$record' => 748, '$exception' => 748, '$actual' => 719, '$connection' => 712, '$item' => 697, '$constraint' => 670, '$operation' => 666, '$date' => 655, '$bucket' => 648, '$array' => 644, '$line' => 643, '$count' => 641, '$uri' => 622, '$buf' => 618, '$handler' => 608, '$default' => 598, '$table' => 594, '$content' => 578, '$reader' => 558, '$resource' => 549, '$application' => 549, '$tokens' => 541, '$locale' => 539, '$attribute' => 531, '$format' => 518, '$filename' => 510, '$className' => 509, '$str' => 505, '$parts' => 505, '$matcher' => 499, '$text' => 498, '$queue' => 483, '$generator' => 480, '$filter' => 476, '$client' => 475, '$level' => 468, '$domain' => 467, '$writer' => 464, '$argument' => 460, '$number' => 459, '$option' => 452, '$payload' => 448, '$keys' => 445, '$process' => 444, '$translator' => 437, '$app' => 435, '$listener' => 430, '$files' => 429, '$index' => 422, )
五、常用自定义函数
array ( 'once' => 1292, 'with' => 1105, 'get' => 997, 'expects' => 700, 'method' => 651, 'set' => 612, 'create' => 600, 'add' => 588, 'foo' => 464, 'format' => 434, 'write' => 429, 'execute' => 421, 'all' => 378, 'evaluate' => 344, 'has' => 320, 'register' => 318, 'fail' => 294, 'find' => 286, 'run' => 284, 'any' => 280, 'start' => 254, 'parse' => 233, 'load' => 203, 'make' => 200, 'read' => 191, 'generate' => 185, 'factory' => 182, 'close' => 164, 'current' => 155, 'render' => 152, 'ask' => 149, 'numerify' => 146, 'will' => 145, 'where' => 137, 'singleton' => 133, 'writeln' => 128, 'valid' => 125, 'next' => 124, 'main' => 122, 'send' => 121, 'trans' => 117, 'request' => 116, 'option' => 113, 'handle' => 112, 'matches' => 111, 'match' => 110, 'contains' => 103, 'write_shortstr' => 103, 'process' => 101, 'never' => 100, 'at' => 99, 'initialize' => 97, 'rewind' => 96, 'bind' => 92, 'validate' => 90, 'dispatch' => 88, 'filter' => 86, 'in' => 86, 'copy' => 85, 'verify' => 84, 'delete' => 83, 'wrap' => 81, 'put' => 79, 'stop' => 78, 'mock' => 78, 'dump' => 78, 'supports' => 78, 'observe' => 77, 'encrypt' => 77, 'attach' => 75, 'first' => 74, 'apply' => 73, 'remove' => 72, 'invoke' => 72, 'connection' => 71, 'advance' => 69, 'decrypt' => 69, 'ordered' => 69, 'save' => 68, 'resolve' => 68, 'prepare' => 67, 'println' => 67, 'auth' => 65, 'reset' => 65, 'bar' => 64, 'write_short' => 64, 'call' => 63, 'map' => 63, 'compare' => 63, 'string' => 62, 'log' => 62, 'wait' => 61, 'info' => 61, 'update' => 60, 'escape' => 60, 'lookup' => 58, 'write_bits' => 57, 'count' => 57, 'push' => 56, 'times' => 55, )
后面的数字,代表为在代码中出现的次数,我用的四项目的vendor目录,里面都是一些比较常用的开源代码库,所以应该算是比较有参考价值