R script reading summary Excel table data

It mainly uses xlsx and rJava package, opens Excel file, reads data of each table, and writes it to summary table.

The following figure shows the original data table before processing:

The following figure shows the processed data:

code implementation

Install & load the function implementation of the package. The installed.packages() function gets all installed packages to exclude installed packages. The install.packages() function installs the specified package. library() loads the incoming package.

loadLibrary <- function(pkgs) {
    uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])]
    
    if (length(uninstalledPkgs)) {
        install.packages(uninstalledPkgs)
    }
    
    for (pkg in pkgs) {
        library(pkg, character.only = TRUE, quietly = TRUE)
    }
}

Custom settings. It involves table file path, summary table name, etc.

setwd("/path/to/work/dir")            # working directory
fileName = "summary_sheet_demo.xlsx"  # Processed form file
summaryName = "Summary"                  # Summary sheet name
summarySheet = FALSE                  # Object variable, ignored
startRow = 2                          # Operation start row in summary

Set up the CRAN source. The official default source may be slow, so it is important to choose a domestic source. Tsinghua CRAN source is selected here, for other references CRAN Mirrors.

options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

Load dependent packages. The function to install the loading package written above is used.

loadLibrary(c("xlsx", "rJava"))

Open the worksheet. loadWorkbook() opens a table file with the specified path and loads it into the workbook object.

wb <- loadWorkbook(fileName)

Traverse Query & summarize data.

# Get all forms
sheets <- getSheets(wb)

# Loop through all tables to find the tables that need to be written
for (sheet in sheets) {
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        summarySheet <- sheet
    }
}

if (summarySheet == FALSE) {
    stop(paste("surface:", summaryName, "not found"))
}

# Specify Date format (ignored here)
# options(xlsx.date.format='yyyy/MM/dd')

# Traverse all tables
for (sheet in sheets) {
    # Filter out the table to be written
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        next
    }
    
    # Get the table [number of content rows]
    rowNum <- sheet$getLastRowNum()
    print(paste("Table name:", sheetName, "In total:", rowNum, "That's ok,", sep = " "))
    
    # The read table content parameter colClasses specifies the type of each column (actually, the class / object that handles the column)
    data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", 
        "Date", "integer", "integer", rep("numeric", 2), "integer"))
    print(data)
    
    # Write the table contents to the summary table
    addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow)
    
    # Cumulative row number
    startRow <- startRow + rowNum
}

Write the object contents to a file to save.

saveWorkbook(wb, fileName)

Complete code

# Package load / install package
loadLibrary <- function(pkgs) {
    uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])]
    
    if (length(uninstalledPkgs)) {
        install.packages(uninstalledPkgs)
    }
    
    for (pkg in pkgs) {
        library(pkg, character.only = TRUE, quietly = TRUE)
    }
}

# Custom configuration
setwd("/path/to/work/dir")            # working directory
fileName = "summary_sheet_demo.xlsx"  # Processed form file
summaryName = "Summary"                  # Summary sheet name
summarySheet = FALSE                  # Object variable, ignored
startRow = 2                          # Operation start row in summary

# Set CRAN
options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

# Load dependency package
loadLibrary(c("xlsx", "rJava"))

# Open Excel table
wb <- loadWorkbook(fileName)
# Get all forms
sheets <- getSheets(wb)

# Loop through all tables to find the tables that need to be written
for (sheet in sheets) {
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        summarySheet <- sheet
    }
}

if (summarySheet == FALSE) {
    stop(paste("surface:", summaryName, "not found"))
}

# Specify Date format (ignored here)
# options(xlsx.date.format='yyyy/MM/dd')

# Traverse all tables
for (sheet in sheets) {
    # Filter out the table to be written
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        next
    }
    
    # Get the table [number of content rows]
    rowNum <- sheet$getLastRowNum()
    print(paste("Table name:", sheetName, "In total:", rowNum, "That's ok,", sep = " "))
    
    # The read table content parameter colClasses specifies the type of each column (actually, the class / object that handles the column)
    data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", 
        "Date", "integer", "integer", rep("numeric", 2), "integer"))
    print(data)
    
    # Write the table contents to the summary table
    addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow)
    
    # Cumulative row number
    startRow <- startRow + rowNum
}

# Finally, the object content needs to be written to the file
saveWorkbook(wb, fileName)

Form attachment

summary_sheet_demo.xlsx

Original address: https://shockerli.net/post/r-script-to-read-summary-excel/

Tags: R Language Excel

Posted on Mon, 02 Dec 2019 21:25:02 -0500 by ryanwood4