1.Awk数组概述
1.什么是awk数组
数组其实也算是变量, 传统的变量只能存储一个值,但数组可以存储多个值。
2.awk数组应用场景
通常用来统计、比如:统计网站访问TOP10、网站Url访问TOP10等等等
3.awk数组统计技巧
1.在awk中,使用数组时,不仅可以使用1 2 3 ..n作为数组索引,也可以使用字符串作为数组索引。
2.要统计某个字段的值,就将该字段作为数组的索引,然后对索引进行遍历。
4.awk数组的语法
array_name[index]=value
5.awk数组示例
1.统计/etc/passwd中各种类型shell的数量
[root@test ~]# awk -F ":" '{a[$NF]++}END{for (i in a){printf "%-20s%s\n",i,a[i]}}' /etc/passwd
/bin/sync 1
/bin/bash 3
/sbin/nologin 21
/sbin/halt 1
/sbin/shutdown 1
或者写到文件里
[root@test ~/2021-07-02]# cat jishu.awk
BEGIN{
FS=":"
}
赋值操作(因为awk是一行一行读入的,相当是循环了整个文件中的内容)
{
hosts[$NF]++
}
赋值完成后,需要通过循环的方式将其索引的次数遍历出来
END {
for (item in hosts) {
print item,
hosts[item]
}
}
[root@test ~/2021-07-02]# awk -f jishu.awk /etc/passwd
/bin/sync 1
/bin/bash 3
/sbin/nologin 21
/sbin/halt 1
/sbin/shutdown 1
2.统计主机上所有的tcp链接状态数,按照每个tcp状态分类
[root@test ~]# netstat -an | grep tcp | awk '{arr[$6]++}END{for (i in arr) print i,arr[i]}'
LISTEN 4
ESTABLISHED 3
3.统计当前系统22端口连接状态信息。<当前时实状态ss>
[root@test ~]# ss -an|awk '/:22/{tcp[$2]++} END {for(i in tcp){print i,tcp[i]}}'
LISTEN 2
ESTAB 3
4.统计当前访问的每个IP的数量<当前时实状态 netstat,ss>
[root@sfy ~]# ss -an|awk '$5~/:443/{ips[$5]++} END {for(i in ips){print i,ips[i]}}'
172.17.188.85:443 324
*:443 1
2.Awk数组示例
Nginx日志分析,日志格式如下:
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
52.55.21.59 - - [25/Jan/2018:14:55:36 +0800] "GET /feed/ HTTP/1.1" 404 162 "https://www.buyao007.icu/" "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52" "-"
1.仅使用awk统计一天内访问最多的10个IP
[root@test ~/2021-07-02]# cat access_ip.awk
{
ip[$1]++
}
END{
for (i=30000;i>0;i--){
for ( b in ip ){
if (i==ip[b]){
printf "%-15s%-10s%s\n",b,"出现的次数为",ip[b]
c++
break
}
# if (c==10){exit}
}
if (c==10){break}
}
}
[root@test ~/2021-07-02]# awk -f access_ip.awk access.log
58.220.223.62 出现的次数为 12049
112.64.171.98 出现的次数为 10856
114.83.184.139 出现的次数为 1982
117.136.66.10 出现的次数为 1662
115.29.245.13 出现的次数为 1318
223.104.5.197 出现的次数为 961
116.216.0.60 出现的次数为 957
180.111.48.14 出现的次数为 939
223.104.5.202 出现的次数为 871
223.104.4.139 出现的次数为 869
2.统计访问大于10000次的IP
[root@test ~/2021-07-02]# cat access_ip\>10000.awk
{
ip[$1]++
}
END{
for (i=30000;i>=10000;i--){
for ( b in ip ){
if (i==ip[b]){
printf "%-15s%-10s%s\n",b,"出现的次数为",ip[b]
c++
break
}
# if (c==10){exit}
}
if (c==10){break}
}
}
[root@test ~/2021-07-02]# awk -f access_ip\>10000.awk access.log
58.220.223.62 出现的次数为 12049
112.64.171.98 出现的次数为 10856
3.统计访问最多的10个页面($request top 10)
[root@test ~/2021-07-02]# cat ngx_request_top_10.awk
{
ip[$7]++
}
END{
for (i=5000;i>=1;i--){
for ( b in ip ){
if (i==ip[b]){
printf "%-15s%-10s%s\n",b,"出现的次数为",ip[b]
c++
break
}
}
if (c==10){break}
}
}
[root@test ~]# awk -f ngx_request_top_10.awk access.log
/online/api/mc/cart/new/getCart.json出现的次数为 4838
/online/api/mc/sys/nowTime.json出现的次数为 3859
/online/mc/crm/integration/ance.json出现的次数为 2445
/online/api/mc/cart/save.json出现的次数为 1872
/ccbs/global/ontextPath.jsp出现的次数为 1797
/mobile/ account/tpl/footerTpl.html出现的次数为 1548
/online/aguage=zh_CNECIAL_MENU出现的次数为 1344
/mobile/tpl/productCategoryTpl.html出现的次数为 912
/ccbs/ plugins/images/loading.gif出现的次数为 838
/favicon.ico 出现的次数为 810
4.统计每个URL访问内容总大小($body_bytes_sent)
[root@test ~/2021-07-02]# cat ngx_request_body.awk
{
ip[$7]=$10+ip[$7]
}
END{
for ( b in ip ){
printf "%-15s%-10s%s\n",b,"大小为",ip[b]/1024/1024"MB"
}
}
[root@test ~/2021-07-02]# awk -f ngx_request_body.awk access.log|sort -k2 -nr|head
/mobe/the/odu/home/images/11/03.png大小为 109.826MB
/mobe/the/odu/home/images/11/04.png大小为 98.5827MB
/mobe/the/odu/home/images/11/02.png大小为 98.2485MB
/mobe/the/odu/home/images/11/05.png大小为 78.9912MB
/online/images/pro _90601.png大小为 63.8436MB
/online/odbproduct_90602.png大小为 63.4223MB
/online/oges/prodduct_90600.png大小为 62.222MB
/mobe/the/odu/11/4.png?v=21020大小为 59.6233MB
/mobe/the/odu/home/images/11/2.png大小为 59.1334MB
/mobe/the/odu/home/images/11/01.png大小为 57.6912MB
5.统计11月22日,每个IP访问状态码数量($status)
[root@test ~/2021-07-02]# cat ngx_ip_code.awk
$3~/22/Nov/
{
ip_code[$1" ""状态码"" "$9]++
}
END{
for ( item in ip_code ) {
print item,ip_code[item]
}
}
[root@test ~/2021-07-02]# awk -f ngx_ip_code.awk access.log |sort -k4 -rn|head|sort -k1.1,1.13 -nrk4|column -t
223.104.5.197 状态码 200 958
117.136.66.10 状态码 200 1655
116.216.0.60 状态码 200 951
114.83.184.139 状态码 200 1975
112.64.171.98 状态码 200 5803
112.64.171.98 状态码 304 3792
112.64.171.98 状态码 404 1148
58.220.223.62 状态码 304 6412
58.220.223.62 状态码 200 4421
58.220.223.62 状态码 404 956
6.统计每个ip访问状态码为404及出现的次数($status)
[root@test ~/2021-07-02]# cat ngx_status_top_404.awk
$3~/22/Nov/
{
ip_code[$1" ""状态码"" "$9]++
}
END{
for ( item in ip_code ) {
if (item~/404$/){
print item,ip_code[item]
}
}
}
[root@test ~/2021-07-02]# awk -f ngx_status_top_404.awk access.log|sort -nrk4|head|sort -k1.1,1.13 -r -nrk4|column -t
219.145.184.196 状态码 404 51
182.34.127.126 状态码 404 51
175.25.171.26 状态码 404 46
140.206.89.150 状态码 404 45
115.29.245.13 状态码 404 89
114.141.164.19 状态码 404 51
114.111.166.19 状态码 404 60
112.64.171.98 状态码 404 1148
58.220.223.62 状态码 404 956
10.8.4.6 状态码 404 57
7.统计各种状态码数量
统计状态码出现的次数
[root@test ~/2021-07-02]# awk '{code[$9]++} END {for(i in code){print i,code[i]}}' access.log
408 13
301 146
302 789
304 18712
400 242
403 37
200 142666
413 50
404 3863
500 7
499 418
[root@test ~]# awk '{if ($9>=200 && $9<300) {j++}\
else if ($9>=300 && $9<400) {k++}\
else if ($9>=400 && $9<500) {n++}\
else if($9>=500) {p++}}\
END{\
printf "%s%s%s%s%s\n",j"\n",k"\n",n"\n",p"\n",i+j+k+n+p}' access.log
3.Awk数组案例
1.模拟生产环境数据脚本
[root@test ~/2021-07-02]# cat insert.sh
!/bin/bash
function create_random()
{
min=$1
max=$(($2-$min+1))
num=$(date +%s%N)
echo $(($num%$max+$min))
}
INDEX=1
i=1
while [ $i -le 3000 ]
do
for user in liu guan zhang sun fuyang
do
COUNT=$RANDOM
NUM1=`create_random 1 $COUNT`
NUM2=`expr $COUNT - $NUM1`
echo "`date '+%Y-%m-%d %H:%M:%S'` $INDEX user: $user insert $COUNT records into datebase:product table:detail, insert $NUM1 records successfully,failed $NUM2 records" >> ./db.log.`date +%Y%m%d`
INDEX=`expr $INDEX + 1`
done
i=$[$i+1]
done
数据格式如下:
2021-07-04 16:11:15 5000 user: fuyang insert 4704 records into datebase:product table:detail, insert 1243 records successfully,failed 3461 records
需求1:统计每个人分别插入了多少条records进数据库
[root@test ~/2021-07-02]# cat db1.awk
BEGIN {
printf "%-20s%-20s\n","User","Total records"
}
{
success[$5]+=$7
#success[$5]=success[$5]+$7
}
END {
for (u in success)
printf "%-20s%-20d\n",u,success[u]
}
[root@test ~/2021-07-02]# awk -f db1.awk db.log.20210704
User Total records
guan 16355784
liu 16838450
fuyang 16517171
sun 16383219
zhang 16444839
需求2:统计每个人分别插入成功了多少record,失败了多少record
[root@test ~/2021-07-02]# cat db2.awk
BEGIN {
printf "%-20s%-20s%-20s\n","User","Success","Failed"
}
{
success[$5]+=$13
failed[$5]+=$16
}
END {
for (u in success)
printf "%-20s%-20d%-20d\n",u,success[u],failed[u]
}
[root@test ~/2021-07-02]# awk -f db2.awk db.log.20210704
User Success Failed
guan 8090983 8264801
liu 7972402 8866048
fuyang 8281769 8235402
sun 8102644 8280575
zhang 8408980 8035859
需求3:将需求1和需求2结合起来,一起输出,输出每个人分别插入多少条数据,多少成功,多少失败,并且要格式化输出,加上标题
[root@test ~/2021-07-02]# cat db3.awk
BEGIN {
printf "%-10s%-10s%-10s%-10s\n","User","Total","Success","Failed"
}
{
success[$5]+=$13
failed[$5]+=$16
}
END {
for (u in success)
printf "%-10s%-10s%-10d%-10d\n",u,success[u]+failed[u],success[u],failed[u]
}
[root@test ~/2021-07-02]# awk -f db3.awk db.log.20210704
User Total Success Failed
guan 16355784 8090983 8264801
liu 16838450 7972402 8866048
fuyang 16517171 8281769 8235402
sun 16383219 8102644 8280575
zhang 16444839 8408980 8035859
需求4:在例子3的基础上,加上结尾,统计全部插入记录数,成功记录数,失败记录数。
[root@test ~/2021-07-02]# cat db4.awk
BEGIN {
printf "%-10s%-10s%-10s%-10s\n","User","Total","Success","Failed"
}
{
total[$5]+=$7
success[$5]+=$13
failed[$5]+=$16
#在原始数据进行统计累计
total_sum+=$7
success_sum+=$13
failed_sum+=$16
}
END {
for (u in success) {
printf "%-10s%-10s%-10d%-10d\n",u,total[u],success[u],failed[u]
}
printf "%-10s%-10s%-10d%-10d\n","total",total_sum,success_sum,failed_sum
}
[root@test ~/2021-07-02]# awk -f db4.awk db.log.20210704
User Total Success Failed
guan 16355784 8090983 8264801
liu 16838450 7972402 8866048
fuyang 16517171 8281769 8235402
sun 16383219 8102644 8280575
zhang 16444839 8408980 8035859
total 82539463 40856778 41682685