merging of hg38 counts of two tables and cbind virus counts

gene_x 0 like s 492 view s

Tags: R

#-- merge the hg38 counts of 38 samples and the hg38 counts of LT_K331A --

# Load necessary libraries
library(dplyr)

# Read the first table
first_table <- read.csv("merged_gene_counts.csv", stringsAsFactors = FALSE)

# Read the second table
second_table <- read.csv("~/DATA/Data_Denise_LT_K331A_RNASeq/results_GRCh38/star_salmon/gene_name_gene_counts.csv", sep = ',', stringsAsFactors = FALSE)

# Extract the relevant columns from the second table
second_table_filtered <- second_table %>%
  select(gene_name, LT_K331A_DI, LT_K331A_DII)

# Summarise the second table by gene_name, summing the LT_K331A_DI and LT_K331A_DII columns for duplicate gene names
second_table_summarised <- second_table_filtered %>%
  group_by(gene_name) %>%
  summarise(LT_K331A_DI = sum(LT_K331A_DI, na.rm = TRUE), LT_K331A_DII = sum(LT_K331A_DII, na.rm = TRUE))

# Merge the tables by matching the 'gene_name' from the second table to the gene_name of the first table
merged_table <- left_join(first_table, second_table_summarised, by = "gene_name")

# Replace NA values with 0
merged_table[is.na(merged_table)] <- 0

# View the result
print(head(merged_table))

# Optionally, write the result to a new file
write.csv(merged_table, "gene_counts_hg38_30samples.csv", row.names = TRUE)

#-- cbind the hg38 and virus counts --

# Load necessary libraries
library(dplyr)

# Read the first table
gene_counts_hg38_30samples <- read.csv("gene_counts_hg38_30samples.csv", stringsAsFactors = FALSE, row.names=1)

# Read the second table
salmon_merged_gene_counts <- read.csv("~/DATA/Data_Denise_LT_K331A_RNASeq/results_JN707599/star_salmon/salmon.merged.gene_counts.csv", sep = ',', stringsAsFactors = FALSE)

# Rename the columns in the second table as specified
names(salmon_merged_gene_counts)[names(salmon_merged_gene_counts) == "LT_K331A_d8_DonorI"] <- "LT_K331A_DI"
names(salmon_merged_gene_counts)[names(salmon_merged_gene_counts) == "LT.K331A.d8.DII_re"] <- "LT_K331A_DII"

# Determine which columns are in both tables
common_columns <- intersect(names(gene_counts_hg38_30samples), names(salmon_merged_gene_counts))

# Select only the common columns from the second table (ignoring the extra 5 columns)
salmon_merged_gene_counts <- salmon_merged_gene_counts[, common_columns]

# Sort the columns of the second table to match the first table
# Ensuring that 'gene_name' remains the first column
cols_order <- c("gene_name", setdiff(names(gene_counts_hg38_30samples), "gene_name"))
salmon_merged_gene_counts <- salmon_merged_gene_counts[, cols_order]

# Merge the tables by pasting together
# Since the columns are now aligned, this should paste them side by side
merged_table <- cbind(gene_counts_hg38_30samples, salmon_merged_gene_counts[,-1])  # -1 to not duplicate gene_name

# Replace NA values with 0 in the entire table
merged_table[is.na(merged_table)] <- 0

# View the result
print(head(merged_table))

# Optionally, write the result to a new file
write.csv(merged_table, "updated_gene_counts_hg38_30samples.csv", row.names = FALSE)

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum