TechSapphire Saturday, 2024-04-27, 1:06 PM
Site menu
Login form
News
Play Games
  • Deep Freeze
  • Ice Slide
  • Gyroball
  • Fat Fish
  • Bush Royal Rampage
  • Apply, TApply, LApply, Vapply, Ftable, xtab and aggregate functions are very important for data transformation. These are basic data processing functions. Script used in above lecture is mentioned below:

    #Yogesh Mehla
    #yogesh.mehla@gmail.com
    #+91-9023262520

    #apply, lapply, sapply, tapply, aggregate, xtab 
    #generating matrix(cross tab reports)

    CricData<-read.csv('G:/R/Rpractice.csv')
    CricData

    for(i in 1:length(CricData))
    {
      print(class(CricData[,i]))
    }

    CricData[,2]<-as.character(CricData[,2])
    CricData[,3]<-as.character(CricData[,3])
    CricData[,4]<-as.character(CricData[,4])
    CricData[,7]<-as.character(CricData[,7])

    for(i in 1:length(CricData))
    {
      print(class(CricData[,i]))
    }

    for(i in 1:length(CricData))
    {
      if(class(CricData[,i])=="integer" || class(CricData[,i])=="numeric")
      {
        print(paste("mean of column ", colnames(CricData[,i]), " is " ,mean(CricData[,i])))
      }
      else
      {
        print(paste("Column ",  colnames(CricData[,i]), " is of type",class(CricData[,i]) ))
      }
    }

    #apply implement function on either row or column depending upon MARGIN parameter
    apply(mtcars, 2, class)

    apply(mtcars,2, mean)

    apply(mtcars,2, name <- function(x) {
      print(paste("mean is ",mean(x)))
    })


    apply(CricData,2, class)

    apply(CricData[5:6],2,class)

    #sapply it by default implement function on column, no MARGIN parameter is available for sapply
    sapply(CricData,name <- function(x) {
      print(paste("mean is ",mean(x)))
    })


    sapply(CricData,name <- function(x) {
      if(class(x)=="integer" || class(x)=="numeric")
      {
        print(paste("mean of column ", colnames(x), " is " ,mean(x)))
      }
      else
      {
        print(paste("Column ",  colnames(x), " is of type",class(x) ))
      }
    })

    apply(CricData[5:6],2,mean)

    fix(CricData)

    apply(CricData[5:6],2,mean,na.rm=TRUE)

    #lapply it by default implement function on column, no MARGIN parameter is available for lapply
    #lapply always return data in form of list
    lapply(CricData[5:6],mean,na.rm=TRUE)

    #unlist of returned output
    unlist(lapply(CricData[5:6],mean,na.rm=TRUE))

    CricData$Franchise<-as.character(CricData$Franchise)

    unique(CricData$Franchise)

    OutputList<-lapply(unique(CricData$Franchise), name <- function(x) {
      CricData[CricData$Franchise==x,]
    })
    OutputList[1]

    UniqueFranchise<-unique(CricData$Franchise)
    OutputList<-lapply( setNames(UniqueFranchise,UniqueFranchise), function(x) {
      CricData[CricData$Franchise==x,]
    })
    OutputList$Jaipur


    #split function do similar task
    OutputSplit<-split(CricData,CricData$Franchise)
    OutputSplit$Delhi

    class(OutputList)
    class(OutputSplit)

    unsplit(OutputSplit,CricData$Franchise)
    unsplit(OutputList,CricData$Franchise)

    #vapply is similar to sapply but you need to specify return type
    vapply(CricData[5:6], mean ,numeric(1))

    #tapply group values one base of INDEX parameter. tapply returns array
    tapply(CricData$Price, CricData$Specialty,sum)

    tapply(CricData$Price, list(CricData$Franchise, CricData$Specialty),sum)

    tapply(CricData$Price, list(Franchise=CricData$Franchise, Specialty=CricData$Specialty),sum)

    CricData[order(CricData$Franchise),]

    CricData[order(CricData$Franchise,CricData$Specialty),]

    CricData[order(-as.numeric(as.factor( CricData$Franchise)),CricData$Specialty,decreasing = FALSE),]

    #tapply splitting into multiple groups or aggregating data in 3 dimensions
    outputTapply<-tapply(CricData$Price, list(Country=CricData$Country, Specialty=CricData$Specialty,GFranchise=CricData$Franchise),sum)
    outputTapply
    outputTapply[1:10,1:5,"Mumbai"]

    dt<-as.data.frame( outputTapply[1:8,1:5,"Mumbai"])
    dt

    (outputTapply[,,3])

    dim(outputTapply)[3]

    d<-lapply(1:dim(outputTapply)[3],function (x) {
      outputTapply[,,x]
    })
    d

    d<-lapply(setNames(1:dim(outputTapply)[3],UniqueFranchise),function (x) {
      outputTapply[,,x]
    })
    d$Bangalore

    #ftable function multidimension output to flat table.
    ftable(outputTapply)


    #xtabs function do cross tab sum
    OutputxTab<-xtabs( CricData$Price ~ CricData$Country + CricData$Franchise + CricData$Specialty  ,CricData)
    OutputxTab
    OutputtApplyXTab<-tapply(CricData$Price, list(Country=CricData$Country,GFranchise=CricData$Franchise, Specialty=CricData$Specialty),sum)
    ftable(OutputtApplyXTab)


    ftable(OutputxTab)
    #aggregate function
    aggregate(CricData$Price,by=list(CricData$Franchise,CricData$Specialty), FUN=mean,na.rm=TRUE)
    aggregate(CricData$Price ~ CricData$Country +CricData$Franchise+ CricData$Specialty,CricData,sum)

    click to download cricdata
     

    Categories
    Programming [27]
    Tips for programming
    Security [2]
    Security Tips
    Google [1]
    Use google faster then ever you use
    Project [14]
    HTML [2]
    Electronics [0]
    Data Structure [0]
    Database [16]
    SQL SERVER
    SSRS [1]
    Sql Server Reporting Services
    Copyright MyCorp © 2024