options( java.parameters = "-Xmx8g" )

.lib<- c("caret", "RSNNS", "farff")
.inst <- .lib %in% installed.packages()
if (length(.lib[!.inst])>0) install.packages(.lib[!.inst], repos=c("http://rstudio.org/_packages", "http://cran.rstudio.com")) 
lapply(.lib, require, character.only=TRUE)
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
set.seed(288)
args <- commandArgs(trailingOnly = TRUE)
datasets <- list.files(".", pattern = "*arff$")
print(datasets)
##  [1] "ada_agnostic.arff"                    
##  [2] "analcatdata_authorship.arff"          
##  [3] "bank-marketing.arff"                  
##  [4] "blood-transfusion-service-center.arff"
##  [5] "climate-model-simulation-crashes.arff"
##  [6] "credit-a.arff"                        
##  [7] "credit-g.arff"                        
##  [8] "cylinder-bands.arff"                  
##  [9] "diabetes.arff"                        
## [10] "haberman.arff"                        
## [11] "heart-statlog.arff"                   
## [12] "hepatitis.arff"                       
## [13] "hill-valley.arff"                     
## [14] "ilpd.arff"                            
## [15] "kc2.arff"                             
## [16] "liver-disorders.arff"                 
## [17] "madelon.arff"                         
## [18] "mfeat-morphological.arff"             
## [19] "one-hundred-plants-shape.arff"        
## [20] "ozone-level-8hr.arff"                 
## [21] "ozone.arff"                           
## [22] "pc1.arff"                             
## [23] "profb.arff"                           
## [24] "qsar-biodeg.arff"                     
## [25] "speeddating.arff"                     
## [26] "vehicle.arff"                         
## [27] "wdbc.arff"                            
## [28] "wilt.arff"
for (id in 1:length(datasets))
  {
    set.seed(288)
    selected <- id
    datos <- readARFF(paste(datasets[selected],sep=""))
    colnames(datos)[ncol(datos)]<-"Class"  
    datos$Class <- as.factor(datos$Class)
    
    
    ### MLP 3 hidden layers
    
    
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=3), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_3", " - Dataset: ", datasets[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_3", " - Dataset: ", datasets[selected], "-> Train ACC: ", overall.accuracy))
            
      }
    
    ### MLP 5 hidden layers
    
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=5), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_5", " - Dataset: ", datasets[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_5", " - Dataset: ", datasets[selected], "-> Train ACC: ", overall.accuracy))
            
      }
}    
## [1] " Technique:  mlp_3  - Dataset:  ada_agnostic.arff -> Train ACC:  0.860806663743972"
## [1] " Technique:  mlp_5  - Dataset:  ada_agnostic.arff -> Train ACC:  0.882288469969312"
## [1] " Technique:  mlp_3  - Dataset:  analcatdata_authorship.arff -> Train ACC:  1"
## [1] " Technique:  mlp_5  - Dataset:  analcatdata_authorship.arff -> Train ACC:  1"
## [1] " Technique:  mlp_3  - Dataset:  bank-marketing.arff -> Train ACC:  0.910928756276127"
## [1] " Technique:  mlp_5  - Dataset:  bank-marketing.arff -> Train ACC:  0.911725022671474"
## [1] " Technique:  mlp_3  - Dataset:  blood-transfusion-service-center.arff -> Train ACC:  0.806149732620321"
## [1] " Technique:  mlp_5  - Dataset:  blood-transfusion-service-center.arff -> Train ACC:  0.802139037433155"
## [1] " Technique:  mlp_3  - Dataset:  climate-model-simulation-crashes.arff -> Train ACC:  0.985185185185185"
## [1] " Technique:  mlp_5  - Dataset:  climate-model-simulation-crashes.arff -> Train ACC:  0.987037037037037"
## [1] "Technique:  mlp_3  - Dataset:  credit-a.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  credit-a.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  credit-g.arff -> Train ACC:  0.89"
## [1] " Technique:  mlp_5  - Dataset:  credit-g.arff -> Train ACC:  0.942"
## [1] "Technique:  mlp_3  - Dataset:  cylinder-bands.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  cylinder-bands.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  diabetes.arff -> Train ACC:  0.798177083333333"
## [1] " Technique:  mlp_5  - Dataset:  diabetes.arff -> Train ACC:  0.81640625"
## [1] " Technique:  mlp_3  - Dataset:  haberman.arff -> Train ACC:  0.823529411764706"
## [1] " Technique:  mlp_5  - Dataset:  haberman.arff -> Train ACC:  0.833333333333333"
## [1] " Technique:  mlp_3  - Dataset:  heart-statlog.arff -> Train ACC:  0.940740740740741"
## [1] " Technique:  mlp_5  - Dataset:  heart-statlog.arff -> Train ACC:  0.962962962962963"
## [1] "Technique:  mlp_3  - Dataset:  hepatitis.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  hepatitis.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  hill-valley.arff -> Train ACC:  0.63036303630363"
## [1] " Technique:  mlp_5  - Dataset:  hill-valley.arff -> Train ACC:  0.61963696369637"
## [1] " Technique:  mlp_3  - Dataset:  ilpd.arff -> Train ACC:  0.761578044596912"
## [1] " Technique:  mlp_5  - Dataset:  ilpd.arff -> Train ACC:  0.780445969125214"
## [1] " Technique:  mlp_3  - Dataset:  kc2.arff -> Train ACC:  0.85632183908046"
## [1] " Technique:  mlp_5  - Dataset:  kc2.arff -> Train ACC:  0.877394636015326"
## [1] " Technique:  mlp_3  - Dataset:  liver-disorders.arff -> Train ACC:  0.759420289855072"
## [1] " Technique:  mlp_5  - Dataset:  liver-disorders.arff -> Train ACC:  0.814492753623188"
## [1] " Technique:  mlp_3  - Dataset:  madelon.arff -> Train ACC:  0.882307692307692"
## [1] " Technique:  mlp_5  - Dataset:  madelon.arff -> Train ACC:  0.941538461538462"
## [1] " Technique:  mlp_3  - Dataset:  mfeat-morphological.arff -> Train ACC:  0.679"
## [1] " Technique:  mlp_5  - Dataset:  mfeat-morphological.arff -> Train ACC:  0.748"
## [1] " Technique:  mlp_3  - Dataset:  one-hundred-plants-shape.arff -> Train ACC:  0.01"
## [1] " Technique:  mlp_5  - Dataset:  one-hundred-plants-shape.arff -> Train ACC:  0.088125"
## [1] " Technique:  mlp_3  - Dataset:  ozone-level-8hr.arff -> Train ACC:  0.978295185477506"
## [1] " Technique:  mlp_5  - Dataset:  ozone-level-8hr.arff -> Train ACC:  0.978295185477506"
## [1] "Technique:  mlp_3  - Dataset:  ozone.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  ozone.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  pc1.arff -> Train ACC:  0.947700631199279"
## [1] " Technique:  mlp_5  - Dataset:  pc1.arff -> Train ACC:  0.946798917944094"
## [1] "Technique:  mlp_3  - Dataset:  profb.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  profb.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  qsar-biodeg.arff -> Train ACC:  0.923222748815166"
## [1] " Technique:  mlp_5  - Dataset:  qsar-biodeg.arff -> Train ACC:  0.95260663507109"
## [1] "Technique:  mlp_3  - Dataset:  speeddating.arff -> ERROR"
## [1] "Technique:  mlp_5  - Dataset:  speeddating.arff -> ERROR"
## [1] " Technique:  mlp_3  - Dataset:  vehicle.arff -> Train ACC:  0.736406619385343"
## [1] " Technique:  mlp_5  - Dataset:  vehicle.arff -> Train ACC:  0.864066193853428"
## [1] " Technique:  mlp_3  - Dataset:  wdbc.arff -> Train ACC:  0.996485061511424"
## [1] " Technique:  mlp_5  - Dataset:  wdbc.arff -> Train ACC:  0.992970123022847"
## [1] " Technique:  mlp_3  - Dataset:  wilt.arff -> Train ACC:  0.990700557966522"
## [1] " Technique:  mlp_5  - Dataset:  wilt.arff -> Train ACC:  0.990287249431701"
PainInTheAss_DS <- c("credit-a.arff", "hepatitis.arff","ozone.arff")

for (id in 1:length(PainInTheAss_DS))
  {
    set.seed(288)
    
    selected <- id
    datos <- readARFF(paste(PainInTheAss_DS[selected],sep=""))
    colnames(datos)[ncol(datos)]<-"Class"  
    datos$Class <- as.factor(datos$Class)
    datos$Class
    
    ### 1st attempt: CLEAN NAs
    datos <- datos[complete.cases(datos), ]
    
    
    
    
    ### MLP 3 hidden layers
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=3), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_3", " - Dataset: ", PainInTheAss_DS[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_3", " - Dataset: ", PainInTheAss_DS[selected], "-> Train ACC: ", overall.accuracy))
            
      }
    
    ### MLP 5 hidden layers
    
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=5), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_5", " - Dataset: ", PainInTheAss_DS[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_5", " - Dataset: ", PainInTheAss_DS[selected], "-> Train ACC: ", overall.accuracy))
            
      }
}    
## [1] " Technique:  mlp_3  - Dataset:  credit-a.arff -> Train ACC:  0.95405819295559"
## [1] " Technique:  mlp_5  - Dataset:  credit-a.arff -> Train ACC:  0.960183767228178"
## [1] " Technique:  mlp_3  - Dataset:  hepatitis.arff -> Train ACC:  0.9875"
## [1] " Technique:  mlp_5  - Dataset:  hepatitis.arff -> Train ACC:  1"
## [1] " Technique:  mlp_3  - Dataset:  ozone.arff -> Train ACC:  0.987554112554113"
## [1] " Technique:  mlp_5  - Dataset:  ozone.arff -> Train ACC:  0.991883116883117"
datos <- readARFF("./profb.arff")
datos <-cbind(datos[,-1], datos[1])
datos <- datos[,-c(8,9)] #Quitamos "Weekday" y OVERTIME"
writeARFF(datos, "./profb_CLEAN.arff")


MuchMorePainInTheAss_DS <- c("profb_CLEAN.arff", "speeddating.arff")


for (id in 1:length(MuchMorePainInTheAss_DS))
  {
    set.seed(288)
    
    selected <- id
    datos <- readARFF(paste(MuchMorePainInTheAss_DS[selected],sep=""))
    
    
    colnames(datos)[ncol(datos)] <- "Class"  
    datos$Class <- factor(datos$Class)
    
    
    ### 1st attempt: CLEAN NAs
    datos <- datos[complete.cases(datos), ]
    
    
    ### 2nd attempt:   Remove columns with variability = 0
    datos <- datos[,!apply(datos, 2, var, na.rm=TRUE) %in%c(0)]
    
    
    ### MLP 3 hidden layers
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=3), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_3", " - Dataset: ", MuchMorePainInTheAss_DS[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_3", " - Dataset: ", MuchMorePainInTheAss_DS[selected], "-> Train ACC: ", overall.accuracy))
            
      }
    
    ### MLP 5 hidden layers
    
    ERROR <-  tryCatch(
      model <- caret::train(Class ~ ., data = datos, method = "mlp", preProc =  c("center","scale"), 
                     tuneGrid=data.frame(size=5), 
                     trControl = trainControl(method="none")), 
      error = function(e){return(TRUE)})
    
    
     if (is.logical(ERROR)){
            print(paste("Technique: ", "mlp_5", " - Dataset: ", MuchMorePainInTheAss_DS[selected], "-> ERROR"))
      }else{
            preds<- tryCatch(predict(model, newdata = datos[-ncol(datos)]), error = function(e) {return(rep(FALSE, nrow(datos)))})
            table(preds)
            table(datos$Class)
            cm <-caret::confusionMatrix(preds, datos$Class)
            overall.accuracy <- cm$overall["Accuracy"]
            print(paste(" Technique: ", "mlp_5", " - Dataset: ", MuchMorePainInTheAss_DS[selected], "-> Train ACC: ", overall.accuracy))
            
      }
}    
## [1] " Technique:  mlp_3  - Dataset:  profb_CLEAN.arff -> Train ACC:  0.892857142857143"
## [1] " Technique:  mlp_5  - Dataset:  profb_CLEAN.arff -> Train ACC:  0.959821428571429"
## [1] " Technique:  mlp_3  - Dataset:  speeddating.arff -> Train ACC:  1"
## [1] " Technique:  mlp_5  - Dataset:  speeddating.arff -> Train ACC:  1"