-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathR assignment 2.R
More file actions
95 lines (78 loc) · 2.86 KB
/
R assignment 2.R
File metadata and controls
95 lines (78 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
library(plyr)
library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
"1. removed the dots in the names"
names(hospitaldata) <- gsub(x = names(hospitaldata),
pattern = "\\.",
replacement = " ")
names(hospitaldata)
"2. day of the week is expected to have most visits"
df<-hospitaldata$Date
df<-wday(mdy(df), label=TRUE)
df
table(df)
"3. What is the average age of patients"
nage<-as.numeric(hospitaldata$Age)
mean(nage, na.rm = TRUE)
"4. How many children were entertained"
age<-as.numeric(hospitaldata$Age)
ages<-subset(age, age >= 1 & age < 12)
length(ages)
"5. Which gender type had what kind of procedure in abundance?
i.e. Female visit mostly because of Gynae Problem"
select(group_by(hospitaldata, Procedure, Sex) ) %>%
table() %>%
View()
"6. Which Doctor is earning highest?"
a<-hospitaldata$`Total Charges`
hospitaldata$`Consulting Doctor`[which.max(a)]
"7. Which procedure type earns more money?"
max(a, na.rm = T, filter = c(hospitaldata$Procedure))
"8. Which time of the day has highest frequency of visits by hour?"
df2 <- hospitaldata$Time
gsub("-", "NA" ,df2)
d<-hour(hm(format( s <- strptime(df2, "%I:%M %p" ), format = "%H:%M")))
count(d)%>%
View()
"9. Create a bracket of time by
Morning, Afternoon, Evening, Night
(6am - 12pm - Morning, 12 pm- 4 pm,
Afternoon, 4 pm- 7pm, Evening, 7pm - 6 am, Night)."
"10. How many patients are repeated visitors?"
b<- ddply(hospitaldata,.(id),nrow )
repeated <-subset(b, b$V1>1)
View(repeated)
"11. Give us the id of repeated visitors."
View(repeated)
"12. Which patients visited again for the same problem"
problem <- ddply(hospitaldata,.(id, Specialty),nrow)
cv<- subset(problem,problem$V1>1)
View(cv)
"13. What is the median age for Females and Males?"
gsub("-", "NA" ,hospitaldata$Age)
toupper(hospitaldata$Sex)
fmed <- subset(hospitaldata, Sex=='F' , select = as.numeric(Age))
median(fmed$Age, na.rm =T)
mmed <- subset(hospitaldata, Sex=='M' , select = as.numeric(Age))
median(mmed$Age, na.rm= T)
"14. What is the total amount in balance?"
g <- hospitaldata$`Amount Balance` <- as.numeric(gsub('[,]', '', hospitaldata$`Amount Balance`))
sum(g, na.rm = T)
"15. How much money was made by Procedure Type "Consultation"?"
proct <- subset(hospitaldata, Procedure == "Consultation")
sum(proct$`Total Charges`, na.rm= T)
"16. Is there a relation between Age and Total Charges paid?"
d <-as.numeric(hospitaldata$Age)
f <-as.numeric(hospitaldata$`Total Charges`)
cor.test(x=d,y=f)
"17. Which Age group had highest number of visits?"
z<- ddply(hospitaldata,.(id, Age),nrow )
repeated1 <-subset(z, z$V1>1)
repeated1
View(repeated1)
"18. What is the total cost earned by Procedure Type X Ray and Scalling together?"
cost1 <- subset(hospitaldata, Procedure == "X Ray")
cost2 <- subset(hospitaldata, Procedure == "Scalling")
sum(cost1$`Total Charges`,cost2$`Total Charges`, na.rm= T)