Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 223 additions & 0 deletions CleanHospitalData.csv

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions Q1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("C:/Users/umair.hanif/Desktop/Learning Outcomes/AR/hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))
8 changes: 8 additions & 0 deletions Q10.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#Question 10
repeated_visitors <- select(dataf,id) %>%
group_by(id) %>%
summarise(visits=length(id)) %>%
arrange(desc(visits)) %>%
filter(visits > 1) %>%
print # printing repeated visitors along with their ids

8 changes: 8 additions & 0 deletions Q11.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#Question 10
repeated_visitors <- select(dataf,id) %>%
group_by(id) %>%
summarise(visits=length(id)) %>%
arrange(desc(visits)) %>%
filter(visits > 1) %>%
print # printing repeated visitors along with their ids

30 changes: 30 additions & 0 deletions Q12.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))


dataf %>%
count(id, Procedure) %>%
slice(which(n>1))%>%
filter(!is.na(Procedure))%>%
select(id,Procedure)
7 changes: 7 additions & 0 deletions Q13.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#Question 13
medianAge <- dataf %>%
select(Sex,Age) %>%
group_by(Sex) %>%
summarise(median(Age, na.rm = TRUE)) %>%
print

5 changes: 5 additions & 0 deletions Q14.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#Question 14
dataf$TotalCharges<-as.numeric(as.character(dataf$TotalCharges))
sum_of_charges <- sum(dataf$TotalCharges, na.rm = TRUE)
print(sum_of_charges)

10 changes: 10 additions & 0 deletions Q15.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Question 15
dataf$TotalCharges<-as.numeric(as.character(dataf$TotalCharges))
consult_amount <- dataf%>%
select(Procedure,TotalCharges) %>%
group_by(Procedure) %>%
filter(Procedure == 'Consultation') %>%
summarise(sum(TotalCharges,na.rm=TRUE)) %>%
print


3 changes: 3 additions & 0 deletions Q16.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#Question 16
cor(dataf$Age,dataf$Age)

35 changes: 35 additions & 0 deletions Q17.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#Question 17
library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("C:/Users/umair.hanif/Desktop/Learning Outcomes/AR/hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))

visits_by_age <- dataf %>%
select(id,Age) %>%
group_by(Age) %>%
summarize(visits=length(Age)) %>%
arrange(desc(visits)) %>%
filter(!is.na(Age)) %>%
print

ggplot(data=visits_by_age,aes(x=as.numeric(Age),y=visits))+geom_bar(stat='identity',fill='slate blue')+ggtitle("Visits By Age")+labs(x='Age',y='Visits')

3 changes: 3 additions & 0 deletions Q18.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#Question 18
dataf$Total_Charges<-as.numeric(dataf$Total_Charges)
sum(dataf$Total_Charges[dataf$Procedure=="X Ray" | dataf$Procedure=="Scalling"],na.rm = TRUE)
10 changes: 10 additions & 0 deletions Q2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Question 2
day_visits <- dataf %>%
mutate(Day=weekdays(dataf$Date),label=TRUE) %>%
group_by(Day) %>%
summarize(visits=length(Day)) %>%
print

ggplot(day_visits,aes(x=Day,y=visits))+geom_bar(stat="identity",fill="slateblue")+ggtitle("Visits per Weekday")+labs(x="Day",y="Visits")


2 changes: 2 additions & 0 deletions Q3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#Question 3
mean(dataf$Age, na.rm = TRUE)
7 changes: 7 additions & 0 deletions Q4.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#Question 4
child <- filter(dataf, Age > 1 & Age < 13) %>%
select(-(Date:Time)) %>%
select(-(Sex:NextApt)) %>%
count() %>%
print

31 changes: 31 additions & 0 deletions Q5.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#Question 5
#Repeating previous code snippet

library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("C:/Users/umair.hanif/Desktop/Learning Outcomes/AR/hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))


dataf$Sex <- gsub("f","F",dataf$Sex)
dataf$Sex<-gsub("\\s|-",NA,dataf$Sex)
qplot(data=dataf, Sex, fill=Procedure)+ggtitle("Procedure vs Gender")+labs(x='Gender',y='Procedure')
29 changes: 29 additions & 0 deletions Q6.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#Question 6
#Repeating previous code snippet

library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("C:/Users/umair.hanif/Desktop/Learning Outcomes/AR/hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))


qplot(data=dataf, fill=ConsultingDoctor, as.numeric(TotalCharges))+ggtitle("Highest Earning")+labs(x='TotalCharges',y='ConsultingDoctor')
28 changes: 28 additions & 0 deletions Q7.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#Question 7
#Repeating previous code snippet

library("ggplot2")
library("lubridate")
library("dplyr")
library("tidyr")
library("DataCombine")

bufferedDataFrame <- read.csv("C:/Users/umair.hanif/Desktop/Learning Outcomes/AR/hospitaldata.csv", strip.white = T, na.strings = c("-",""," ","\t","\n",NA), stringsAsFactors = F)
dataf <- tbl_df(bufferedDataFrame)
View(dataf)
glimpse(dataf)

#removing character from age M
dataf$Age <- as.numeric(gsub("[^0-9]",'',dataf$Age))
class(dataf$Age)
unique(dataf$Age)


# Question #1
#Changing Datatypes from factor to their required formats
names(dataf) <- gsub("\\.",'',names(dataf))

#now lets play with date
dataf$Date <- as.Date(strptime(dataf$Date, "%a, %B %d, %Y"))

qplot(data=dataf, as.numeric(TotalCharges), fill=Procedure)+ggtitle("Highest Procedure")+labs(x='TotalCharges',y='Procedure')
10 changes: 10 additions & 0 deletions Q8.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Question 8
hour_visits <- dataf %>%
select(Time) %>%
mutate(Hour = hour(hm(format(strptime(dataf$Time, "%I:%M %p"), "%H:%M")))) %>%
group_by(Hour) %>%
summarize(visits=length(Hour)) %>%
arrange(desc(visits)) %>%
print # printing 13 the highest hour, i.e. actaully 1 AM/PM in 12 hour format