#RUSIS11_lab12

library(ggplot2)

head(movies)

recent_movies <- subset(movies, year >= 1970)

qplot(rating, data = recent_movies, geom = 'density') + 
  facet_wrap(~ year)
  
recent_movies$genre <- NA  
recent_movies <- within(recent_movies,{
  genre[Action == 1] <- 'Action'
  genre[Animation == 1] <- 'Animation'
  genre[Comedy == 1] <- 'Comedy'
  genre[Drama == 1] <- 'Drama'      
  genre[Documentary == 1] <- 'Documentary'
  genre[Romance == 1] <- 'Romance'  
  genre[Short == 1] <- 'Short'  
  genre[is.na(genre)] <- 'Other'
})  

table(recent_movies$genre)

qplot(rating, data = recent_movies, geom = 'density',
    facets = genre ~ .)

qplot(rating, data = recent_movies, geom = 'density',
    fill = genre, colour = genre, alpha = .2)

qplot(rating, data = recent_movies, geom = 'density',
    fill = genre, colour = genre, alpha = I(.2)) + 
  facet_wrap(~ year)
  
## we may want to remove movies whose genre-cohort for that
## year was small.  

nrow(subset(recent_movies, Animation == 1 & year == 1978))  
subset(recent_movies, Animation == 1 & year == 1978)

## uh oh! the movies are not assigned a unique genre,
## so something else would have to be tried

biggest <- data.frame(
   price_max = max(diamonds$price),
   carat_max = max(diamonds$carat))
   
big <- arrange(subset(diamonds, carat > 3), desc(price))
diamonds <- transform(diamonds, diameter = (x + y)/2 )
diamonds <- transform(diamonds, depth_yo = (z / diameter * 100))

diamonds <- transform(diamonds, diff = depth - depth_yo)
subset(diamonds, diff == max(diff, na.rm = TRUE) | diff == min(diff, na.rm = TRUE))

library(plyr)
#Load data.Rdata
load(file.choose())

data <- transform(data, AVG = H / AB)

avg_maker <- function(df){
  avg <- mean(df$AVG, na.rm = TRUE)	
  df$careerAVG <- avg
  df
}
ddply(data, "nameFull", avg_maker)

careerHRs <- function(df){
  c(careerHRs = sum(df$HR))
}
ddply(data, 'nameFull', careerHRs)

player_team_years <- function(df){
  c(years = nrow(df))
}
ddply(data, c('nameFull', 'teamID'), player_team_years)
ddply(data, .(nameFull, teamID), player_team_years)