R-Retrieve Data (step 1)

简介: R is a data analysis and visualization platform.

[I]Data Structure

1.vector

  • numeric
> a<-c(1,2,3)  
> a
[1] 1 2 3
  • character
> a<-c("1","2","3")   
> a
[1] "1" "2" "3"
  • logicals
> a<-c(FALSE,TRUE)
> a
[1] FALSE  TRUE
  • scalar
> a<-FALSE
> a
[1] FALSE

2.matrix

  • create
#base
> mymatrix<-matrix(1:9,nrow=3,ncol=3)
> mymatrix
     [,1] [,2] [,3]
[1,]    1    4    7
[2,]    2    5    8
[3,]    3    6    9
#labeling rows/cols
> cells<-c(1:9)
> rnames<-c("R1","R2","R3")
> cnames<-c("C1","C2","C3")
> mymatrix<-matrix(cells,nrow=3,ncol=3,byrow=TRUE,dimnames=list(rnames,cnames))    #fill according to row
> mymatrix
   C1 C2 C3
R1  1  2  3
R2  4  5  6
R3  7  8  9
> mymatrix<-matrix(cells,nrow=3,ncol=3,byrow=FALSE,dimnames=list(rnames,cnames))    #fill according to column
> mymatrix
   C1 C2 C3
R1  1  4  7
R2  2  5  8
R3  3  6  9
  • index
> mymatrix[2,]
C1 C2 C3 
 2  5  8 
> mymatrix[,2]
R1 R2 R3 
 4  5  6 
> mymatrix[2,2]
[1] 5
> mymatrix[2,c(1,2)]
C1 C2 
 2  5 

3.array

> dim1<-c("A1","A2","A3")
> dim2<-c("B1","B2","B3")
> dim3<-c("C1","C2","C3")
> myarry<-array(1:27,c(3,3,3),dimnames=list(dim1,dim2,dim3))
> myarray
, , C1

   B1 B2 B3
A1  1  4  7
A2  2  5  8
A3  3  6  9

, , C2

   B1 B2 B3
A1 10 13 16
A2 11 14 17
A3 12 15 18

, , C3

   B1 B2 B3
A1 19 22 25
A2 20 23 26
A3 21 24 27

4.data frame

  • create
> ID<-c(1,2,3,4)
> age<-c(27,26,32,54)
> sex<-c("Male","Female","Female","Female")
> music<-c("Type2","Type1","Type1","Type1")
> status<-c("Excellent","Improved","Poor","Excellent")
> singerdata<-data.frame(ID,age,sex,music,status)
> singerdata
  ID age    sex music    status
1  1  27   Male Type1 Excellent
2  2  26 Female Type2  Improved
3  3  32 Female Type1      Poor
4  4  54 Female Type1 Excellent
  • select
#base
> singerdata[1:2]
  ID age
1  1  27
2  2  26
3  3  32
4  4  54
> singerdata[c("music","status")]
  music    status
1 Type2 Excellent
2 Type1  Improved
3 Type1      Poor
4 Type1 Excellent
> table(singerdata$music,singerdata$status)
       
        Excellent Improved Poor
  Type1         1        1    1
  Type2         1        0    0
#external data:e.g.mtcars
> attach(mtcars)    #No object of the same name 
> summary(mpg)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10.40   15.43   19.20   20.09   22.80   33.90 
> plot(mpg,disp)
> detach(mtcars)
> with(mtcars,{     #object of the same name
+  nokeepstats<-summary(mpg)
+  keepstats<<-summary(mpg)
+ })
> nokeepstats    #internal call
Error: object 'nokeepstats' not found
> keepstats    #external call
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10.40   15.43   19.20   20.09   22.80   33.90 

5.factor

> ID<-c(1,2,3,4)
> age<-c(27,26,32,54)
> sex<-c(1,2,2,2)
> music<-c("Type1","Type2","Type1","Type1")
> status<-c("Excellent","Improved","Poor","Excellent")
> sex<-factor(sex,levels=c(1,2),labels=c("Male","Female"))    #label replace
> music<-factor(music)    #default order:A-Z
> status<-factor(status,order=TRUE,levels=c("Poor","Improved","Excellent"))    #customize order
> singerdata<-data.frame(ID,age,sex,music,status)
> str(singerdata)
'data.frame':   4 obs. of  5 variables:
 $ ID    : num  1 2 3 4
 $ age   : num  27 26 32 54
 $ sex   : Factor w/ 2 levels "Male","Female": 1 2 2 2
 $ music : Factor w/ 2 levels "Type1","Type2": 1 2 1 1
 $ status: Ord.factor w/ 3 levels "Poor"<"Improved"<..: 3 2 1 3
> summary(singerdata)
       ID            age            sex      music         status 
 Min.   :1.00   Min.   :26.00   Male  :1   Type1:3   Poor     :1  
 1st Qu.:1.75   1st Qu.:26.75   Female:3   Type2:1   Improved :1  
 Median :2.50   Median :29.50                        Excellent:2  
 Mean   :2.50   Mean   :34.75                                     
 3rd Qu.:3.25   3rd Qu.:37.50                                     
 Max.   :4.00   Max.   :54.00           

6.list

  • create
> tit<-"singer list"
> name<-c("Hua","Deng","Ga","Wang")
> age<-c(27,26,32,54)
> position<-matrix(1:9,nrow=3)
> mylist<-list(title=tit,name,ages=age,position)
> mylist
$title
[1] "singer list"

[[2]]
[1] "Hua"  "Deng" "Ga"   "Wang"

$ages
[1] 27 26 32 54

[[4]]
     [,1] [,2] [,3]
[1,]    1    4    7
[2,]    2    5    8
[3,]    3    6    9
  • index
> mylist[[2]]
[1] "Hua"  "Deng" "Ga"   "Wang"
> mylist[["ages"]]
[1] 27 26 32 54

[II]Import Data

1.keyboard

  • text editor
> mydata<-data.frame(age=numeric(0),sex=character(0),order=numeric(0))
> mydata<-edit(mydata)    #reopen:> mydata<-edit(mydata)

result:
importdata_keyborad

  • embedded
> mydatatxt<-"
+ age sex order
+ 27 Male 1
+ 26 Female 2
+ 32 Female 3
+ 54 Female 4
+ "
> mydata<-read.table(header=TRUE,text=mydatatxt)
> mydata
  age    sex order
1  27   Male     1
2  26 Female     2
3  32 Female     3
4  54 Female     4

2.delimited text file

singerdata.csv
singerdata

> music<-read.table("singerdata.csv",header=TRUE,row.names="ID",sep=",",colClasses=c("character","character","numeric","character","numeric"))
> music
  name age    sex music
1  Hua  27   Male   100
2 Deng  26 Female    95
3   Ga  32 Female    NA
4 Wang  54 Female    93
> str(music)
'data.frame':   4 obs. of  4 variables:
 $ name : chr  "Hua" "Deng" "Ga" "Wang"
 $ age  : num  27 26 32 54
 $ sex  : chr  "Male" "Female" "Female" "Female"
 $ music: num  100 95 NA 93

3.Excel

> install.packages("xlsx")
> install.packages("xlsxjars")
> install.packages("rJava") 
> library(xlsx)
> workbook<-"c:/myworkbook.xlsx"
> mydatdaframe<-read.xls(workbook,1)

4.XML

RSXML

5.web page

Webscraping using readLines and RCurl

6.SPSS

> install.packages("Hmisc")
> library(Hmisc)
> mydataframe<-spss.get("mydata.sav",use.value.labels=TRUE)

7.SAS

#installed SAS
> install.packages("Hmisc")  
> library(Hmisc)
> datadir<-"C/data"
> sasexe<-"C/Program Files/SASHome/SASFoundation/9.4/sas.exe"
> mydata<-sas.get(libraryName=datadir,member="clients",sasprog=sasexe)
#not installed SAS
> install.packages("sas7bdat")  
> library(sas7bdat)
> mydata<-read.sas7bdat("C:/mydata/clients.sas7bdat")

8.Stata

> install.packages("foreign")
> library(foreign)
> mydataframe<-read.dta("mydata.dta")

9.NetCDF

> install.packages("ncdf4")
> library(ncdf4)
> myarray<-ncatt_get(nc,varid)

10.HDF5

> source("http://bioconductor.org/biocLite.R")
> biocLite("rhdf5")
> library(rhdf5)
> h5ls("C:/mydata")

11.data management system

  • ODBC API
> install.packages("RODBC")
> library(RODBC)
> myconn<-odbcConnect("mydsn",uid="xw",pwd="xinwenfan")
> crimedat<-sqlFetch(mymonn,Crime)
> pundat<-sqlQuery(myconn,"select* from Punishment")
> close(myconn)

12.Stat/Transfer

stattransfer


[III]Object Function

> length(object)    #the number of elements/ingredients in the object
> dim(object)    #dimension of object
> str(object)    #structure of object
> class(object)    #type of object
> mode(object)    #type of object
> names(object)    #name of ingredients
> c(object,object,...)    #binding objects into one vector
> cbind(object,object,...)    #binding objects by column
> rbind(object,)    #binding objects by row
> object    #printing object
> head(object)    #listing the beginning part
> tail(object)    #listing the ending part
> ls()    #show current list

----

[IV]Generate dynamic reports by R and LaTex

1.install LeTex

Linux:TeXLive
Windows:MikTex
Mac:MacTeX

2.install knitr

> install.packages("knitr")

3.install data package

#e.g.multcomp
> install.packages("multcomp")

4.Creat File.Rnw

#e.g. drugs.Rnw

5.process data

> library(knitr)
> knit("drugs.Rnw")    #drugs.tex
> knit2pdf("drugs.Rnw")    #drugs.pdf

END!

目录
相关文章
|
3天前
|
机器学习/深度学习 人工智能
【CatBoost报错解决】CatBoostError: Bad value for num feature[non default doc idx=0,feature idx=19]=
【CatBoost报错解决】CatBoostError: Bad value for num feature[non default doc idx=0,feature idx=19]=
You have to specify ‘-keep‘ options for the shrinking step
You have to specify ‘-keep‘ options for the shrinking step
53 0
解决办法:RuntimeError: dictionary changed size during iteration
解决办法:RuntimeError: dictionary changed size during iteration
89 0
成功解决ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be
成功解决ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be
成功解决ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be
|
SQL Java 数据库连接
JPA异常:Batch update returned unexpected row count from update [0]; actual row count: 0; expected: 1
JPA异常:Batch update returned unexpected row count from update [0]; actual row count: 0; expected: 1
1769 0
Step by step to create time dependent view
Step1: Create your transparent table as usual. The only special task is to include two additional fields for start and end date. Use predefined VIM_BEGDA and VIM_ENDDA.
93 0
Step by step to create time dependent view
D3 dataset - what is usage of key function in data
Created by Wang, Jerry, last modified on Sep 21, 2015
101 0
D3 dataset - what is usage of key function in data
OPA 20 - deliberately generate an error
Created by Wang, Jerry, last modified on Nov 08, 2015
103 0
OPA 20 - deliberately generate an error
|
开发工具
R-Organize Data(step 2)
R is a data analysis and visualization platform.
914 0
|
资源调度
R-Description Data(step 3)
R is a data analysis and visualization platform.
1065 0