Fink, Fiehn & Wald-Fuhrmann (2024). The role of audiovisual
congruence in aesthetic appreciation of contemporary music and visual
art. Scientific Reports, 71399. DOI: https://doi.org/10.1038/s41598-024-71399-y
If using any code or data from this notebook, please cite the
paper.
Load and preprocess data
Turn off warnings for cleaner submission file. N.B. users of this
file should turn warnings on for troubleshooting
Install required packages, if not already installed
list.of.packages <- c("tidyverse", "ggpubr", "ez", "corrplot", "car", "stats","PerformanceAnalytics", "afex", "emmeans", "mlma", "ggplot2", "ggExtra", "psych", "ggpubr", "here", "gridExtra", "ggridges")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Load packages and data
We use the here package so that all file paths are relative.
Users should be sure not to change the directory structure of the
repository. By default, figures will be plotted in line. If you want to
save the figures to a folder on your local machine, set op = 1.
here::i_am("Fink_Kentler_analyses.Rmd")
library(here)
source(here("dependencies.R"))
data <- read.csv(here("data.csv"))
fig_path <- here("figures/") # path to save generated figures to
op = 0 # flag if want to save output plots to files (1) or plot inline (0)
Pre-process the data
Clean up variable names, add combined columns of interest, drop
unnecessary columns
# remove variables we don't need (Labvanced still outputting old vars from earlier exp. version)
data_new <- subset(data, select = - c(Block_Name, Task_Nr, aud_tend_unix, aud_ts, aud_ts_unix, condi3, copy_of_condi2, copy_of_condi, frame_end_unix, frame_rt, Name.Audio._copy_4b49, Name.Audio._copy_93a7, Name.Audio._copy_bd6f, Name.Audio._copy_c958, Name.Audio._copy_c96c, Name.Audio., subj_counter_global, subj_counter_per_group, time_delay_offset, time_measure_std, pixelDensityPerMM,unlocked))
# filter out people who have seen/heard stimuli before (some of our pilots) or who don't have normal hearing / vision
exposed = table(data$prev_exposure)
data_new <- data_new[-c(which(data_new$prev_exposure == "option_0")), ]
normSenses = table(data$Normal.Vision.) # no one without
data_new <- data_new[-c(which(data_new$completed == "no")), ] # remove incompletes
data = data_new # overwrite old data so can't accidentally use it
#create a dataframe just with the task block (for further analysis )
data_task <- data_new %>%
select(Trial_Nr, Trial_Id, correspondence_, enjoy_, Frame.Time., interestingness_, mood.congruency_, move_, subject_code, Block_Nr, group_name, exp_subject_id)
data_task <- data_task[, c("exp_subject_id", "Block_Nr", "Trial_Nr", "Trial_Id","group_name", "Frame.Time.", "correspondence_", "enjoy_", "interestingness_", "mood.congruency_", "move_" )]
# put time spent in secs (instead of ms)
data_task$Frame.Time. = data_task$Frame.Time. / 1000
# Add column for condition
# Trial_Id tells which condition it is (1 - 4 = A; 5 - 8 = V; 9 - 12 = AVI; 13 - 16 = AVR)
# with Trial_Id and the group number we can look which specific stimuli are used
data_task$Trial_Id = as.numeric(data_task$Trial_Id)
data_task$condition = NA
data_task$condition[data_task$Trial_Id <= 4] = 'Audio'
data_task$condition[data_task$Trial_Id >= 5 & data_task$Trial_Id <= 8] = 'Visual'
data_task$condition[data_task$Trial_Id >= 9 & data_task$Trial_Id <= 12] = 'AV-Intended'
data_task$condition[data_task$Trial_Id >= 13] = 'AV-Random'
# create subset data frame of task data
#just keep Block Number = 2 (these are the experimental trials)
data_task <- data_task %>%
filter(Block_Nr == 2)
# create subset data frame with just participant info
data_participant <- data_new %>%
select(exp_subject_id, group_name, age_input, gender, country_current, country_home, Modality_Preference, Ollen, overall_enjoy, "prev_exposure", realWorldLikelihood)
data_participant = na.omit(data_participant)
# calculate area scores for each participant and add to participant data
# AReA scale scoring
#Aesthetic Appreciation: Items 1, 2, 3, 4, 6, 9, 13, 14
#Intense Aesthetic Experience: Items 8, 11, 12, 13
#Creative Behavior: Items 5, 7, 10
area_data = data[data$Task_Name == 'AReA',]
area_data$area_aes_apprec = area_data$area_01 + area_data$area_02 + area_data$area_03 + area_data$area_04 + area_data$area_06 + area_data$area_09 + area_data$area_13 + area_data$area_14
area_data$area_aes_apprec = area_data$area_aes_apprec / 8 # divide by num of items
area_data$area_intense_aes_exp = area_data$area_08 + area_data$area_11 + area_data$area_12 + area_data$area_13
area_data$area_intense_aes_exp = area_data$area_intense_aes_exp / 4
area_data$area_creative_beh = area_data$area_05 + area_data$area_07 + area_data$area_10
area_data$area_creative_beh = area_data$area_creative_beh / 3
area_data$area_composite = (area_data$area_aes_apprec + area_data$area_intense_aes_exp + area_data$area_creative_beh) / 3
# add area scores to mean table (join based on exp_subj_id)
area_data = area_data[,c("exp_subject_id", "area_aes_apprec", "area_intense_aes_exp", "area_creative_beh", "area_composite")]
# merge area with participant df
data_participant = merge(x=data_participant,y=area_data,
by="exp_subject_id", all=TRUE)
# clean up modality pref data to have meaningful labels
temp = data_participant$Modality_Preference
temp = gsub("option_0", "prefer_audio", temp)
temp = gsub("option_1", "prefer_visual", temp)
temp = gsub("option_2", "prefer_audiovisual", temp)
temp = gsub("option_3", "no_preference", temp)
data_participant$Modality_Preference_cat = temp
# join participant info with trial level data for final full data frame
full_data = merge(x=data_task,y=data_participant,
by="exp_subject_id", all=TRUE)
Check participant number discrepencies between task data and demo
data
# something wrong! only 195 obs for people who saw endQs ?! (these were required questions. not sure how labvanced let that through)
# table(data_new$Task_Name)
# check what going on with 6 subs
check = subset(full_data, is.na(full_data$Ollen))
check2 = subset(data, is.na(full_data$Ollen))
nsubs = length(unique(full_data$exp_subject_id))
nsubs_missingDemo = length(unique(check$exp_subject_id))
It seems 6 participants never got the ending questions. We can
include them for all trial analyses but they will be excluded from
person-level analyses
Person-level analyses
Explore demographic data
gender_dist = table(data_participant$gender)
age_mean = mean(data_participant$age_input, na.rm = TRUE)
age_std = sd(data_participant$age_input, na.rm = TRUE)
ncountries = length(unique(data_participant$country_current))
countries = table(data_participant$country_current)
modality_pref = table(data_participant$Modality_Preference_cat)
print("Gender: man, woman, non-binary, prefer not to say")
[1] "Gender: man, woman, non-binary, prefer not to say"
print(gender_dist)
0 1 2 3
117 73 4 1
print("Age (mean, sd, min, max)")
[1] "Age (mean, sd, min, max)"
print(age_mean)
[1] 31.01538
print(age_std)
[1] 11.81973
print(min(data_participant$age_input, na.rm = TRUE))
[1] 18
print(max(data_participant$age_input, na.rm = TRUE))
[1] 68
print("Unique countries:")
[1] "Unique countries:"
print(ncountries)
[1] 23
print(countries)
Australia Belgium Canada Chile Czech Republic Ecuador
2 1 3 1 2 1
Germany Greece Hungary India Ireland Italy
1 9 4 1 2 11
Mexico Nigeria Poland Portugal South Africa Spain
12 2 23 34 15 9
Switzerland Ukraine United Kingdom United States
2 1 47 12
print("AReA (mean, sd, min, max):")
[1] "AReA (mean, sd, min, max):"
print(mean(data_participant$area_composite))
[1] 2.62576
print(sd(data_participant$area_composite))
[1] 0.7410346
print(min(data_participant$area_composite))
[1] 1.125
print(max(data_participant$area_composite))
[1] 4.763889
print("AReA AA (mean, sd, min, max):")
[1] "AReA AA (mean, sd, min, max):"
print(mean(data_participant$area_aes_apprec))
[1] 3.439055
print(sd(data_participant$area_aes_apprec))
[1] 0.69193
print(min(data_participant$area_aes_apprec))
[1] 1.125
print(max(data_participant$area_aes_apprec))
[1] 5
print("AReA IAE (mean, sd, min, max):")
[1] "AReA IAE (mean, sd, min, max):"
print(mean(data_participant$area_intense_aes_exp))
[1] 2.461443
print(sd(data_participant$area_intense_aes_exp))
[1] 0.9202953
print(min(data_participant$area_intense_aes_exp))
[1] 1
print(max(data_participant$area_intense_aes_exp))
[1] 5
print("AReA CB (mean, sd, min, max):")
[1] "AReA CB (mean, sd, min, max):"
print(mean(data_participant$area_creative_beh))
[1] 1.976783
print(sd(data_participant$area_creative_beh))
[1] 0.9174315
print(min(data_participant$area_creative_beh))
[1] 1
print(max(data_participant$area_creative_beh))
[1] 5
Plot experiment-related person-level variables
# clean up and plot ollen data
ollenScale = c("non-musician", "music-loving non-musician", "amateur musician", "serious amateur musician", "semi-professional musician", "professional musician")
ollen = table(data_participant$Ollen)
ollen = data.frame(ollen)
names(ollen) = c("Response","Participants")
ollen$Response = ollenScale[1:5] # nobody chose professional!
print(ollen)
describe(ollen$Participants)
# order Ollen properly
ollen <- within(ollen, Response <- factor(Response, levels=ollenScale))
# create barplot
# png("Kentler_ollen.png", units="in", width=8, height=5, res=300)
ggplot(ollen, aes(x=Response, y=Participants)) +
geom_bar(stat = "identity") + coord_flip() + theme_classic() + theme(text = element_text(size = 20))
# Plot area data
ggplot(data_participant, aes(x=area_composite)) +
geom_histogram(bins = 20) + ggtitle("AReA composite score")
# Plot enjoyment data
ggplot(data_participant, aes(x=overall_enjoy)) +
geom_histogram(bins = 25) + ggtitle("Overall enjoyment of the experiment")
ggplot(data_participant, aes(x=realWorldLikelihood)) +
geom_histogram(bins = 25) + ggtitle("Likelihood of seeking similar pieces in real world")
print("Enjoyment (mean, sd)")
[1] "Enjoyment (mean, sd)"
print(mean(data_participant$overall_enjoy, na.rm=T))
[1] 84.58974
print(sd(data_participant$overall_enjoy, na.rm=T))
[1] 20.67136
print("Likelihood (mean, sd)")
[1] "Likelihood (mean, sd)"
print(mean(data_participant$realWorldLikelihood, na.rm=T))
[1] 54.81026
print(sd(data_participant$realWorldLikelihood, na.rm=T))
[1] 31.5477
# plot relation between enjoyment and likelihood
p <- ggplot(data_participant, aes(x=overall_enjoy, y=realWorldLikelihood)) +
geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
theme(legend.position="none") + theme_classic()
# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray")
p2
# Self-reported modality prefs
# create barplot of modality preferences
mpref = data.frame(modality_pref)
names(mpref) = c("Response","Participants")
mpref$percent = mpref$Participants / sum(mpref$Participants)
# png("Kentler_preference.png", units="in", width=8, height=5, res=300)
ggplot(mpref, aes(x=Response, y=Participants)) +
geom_bar(stat = "identity") + coord_flip() + theme_classic() + theme(text = element_text(size = 20))
Plot relationship between all person-level vars
# create data frame for correlation
# need to convert categorical to numeric
corrdf = data_participant[ , which(names(data_participant) %in% c("Ollen", "Modality_Preference", "overall_enjoy", "realWorldLikelihood", "area_aes_apprec", "area_intense_aes_exp", "area_creative_beh", "area_composite"))]
corrdf$Modality_Preference = as.numeric(as.factor(corrdf$Modality_Preference))
corrdf$Ollen = as.numeric(as.factor(corrdf$Ollen))
chart.Correlation(corrdf, histogram=TRUE, pch=25, text.panel=labs)
Condition-level analyses
Create tables of means
Z-score ratings data within scale, participant
Calculate table of means for all participants / conditions - do this for
both z-scored and raw data in case we want to compare
# create zscore function
zscore <- function(input){
out = (input - mean(input))/sd(input)
}
# zscore data
zscored = data_task %>%
group_by(exp_subject_id) %>%
mutate(z_enjoy = zscore(enjoy_), z_interesting = zscore(interestingness_), z_mood_cong = zscore(mood.congruency_), z_moved = zscore(move_))
# NOTE zscoring correspondence needs to be done only for AV conditions
# get means of zscored data
z_means = zscored %>%
group_by(exp_subject_id, condition) %>%
summarise(mean_timespent = mean(Frame.Time.), mean_enjoy = mean(z_enjoy), mean_interesting = mean(z_interesting), mean_mood_cong = mean(z_mood_cong), mean_moved = mean(z_moved))
# get means of zscored data
# z_means_participant = z_means %>%
# group_by(exp_subject_id) %>%
# summarise(mean_timespent = mean(mean_timespent), mean_enjoy = mean(mean_enjoy), mean_interesting = mean(mean_interesting), mean_mood_cong = mean(mean_mood_cong), mean_moved = mean(mean_moved))
#
# get raw means in case want to compare to zscored data
means = data_task %>%
group_by(exp_subject_id, condition) %>%
summarise(mean_timespent = mean(Frame.Time.), mean_enjoy = mean(enjoy_), mean_interesting = mean(interestingness_), mean_mood_cong = mean(mood.congruency_), mean_moved = mean(move_), mean_corresp = mean(correspondence_))
# get raw sds in case want to compare to zscored data
sds = data_task %>%
group_by(exp_subject_id, condition) %>%
summarise(mean_timespent = sd(Frame.Time.), mean_enjoy = sd(enjoy_), mean_interesting = sd(interestingness_), mean_mood_cong = sd(mood.congruency_), mean_moved = sd(move_), mean_corresp = sd(correspondence_))
Create custom functions for analysis and plotting
We have multiple DVs on which we want to run the same type of
analysis
# Function to plot violins for each condition
# Input: dataframe, variable to plot, title for plot
plotViolin = function(data, groupvar, var, titstr, xlab, ylab){
# define colors
AVR = '#8c1aff'
AVI = '#cc99ff'
V = '#ff4d4d'
A = '#6699ff'
ourcolors = c(A, AVI, AVR, V)
# only AV conditions relevant
if (var == "z_corresp"){
ourcolors = c(AVI, AVR)
}
# Plot
var = sym(var)
groupvar = sym(groupvar)
pl = data %>%
ggplot(aes(x=!!groupvar, y=!!var, fill=!!groupvar)) +
geom_violin(legend = FALSE, trim = FALSE, adjust = 1, draw_quantiles = c(0.25, 0.75), size=.5, linetype = "dashed", color="black") + #.8
geom_violin(legend = FALSE, trim = FALSE, adjust = 1, draw_quantiles = 0.5, fill="transparent", size=.85, color="black") + #1.2
geom_jitter(size=0.4, alpha=0.6, color="gray") +
scale_fill_manual(values=ourcolors) +
#theme_classic() +
theme_ridges() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle(titstr) +
xlab(xlab) +
ylab(ylab)
return(pl)
}
# Function to create density ridge plots for each condition
# Input: dataframe, variable to plot, title for plot
plotRidges = function(data, groupvar, var, titstr, xlab, ylab){
# define colors
AVR = '#CCBB44'
AVI = '#228833'
V = '#CC6677'
A = '#88CCEE'
ourcolors = c(A, AVI, AVR, V)
# only AV conditions relevant
if (var == "z_corresp"){
ourcolors = c(AVI, AVR)
}
# Plot
var = sym(var)
groupvar = sym(groupvar)
pl = data %>%
ggplot(aes(x=!!var, y=!!groupvar, fill=!!groupvar)) +
# stat_density_ridges(quantile_lines=TRUE, quantiles = c(0.025, 0.975), color="black", alpha = 1) +
geom_density_ridges(inherit.aes = TRUE, jittered_points = TRUE, stat ="density_ridges",
position = position_points_jitter(width = 0.05, height = 0),
point_shape = '|', point_size = 3, point_alpha = .9, alpha = 0.7) +
geom_density_ridges(alpha = 0.85, quantile_lines=TRUE,
color="white", quantile_fun=function(var,...)mean(var)) +
scale_fill_manual(values=ourcolors) +
xlab(ylab) +
theme_ridges() + theme(legend.position = "none") +
theme(axis.title.x = element_text(hjust = 0.5)) +
theme(axis.title.y=element_blank())
return(pl)
}
# Function to run rm anova and output stats
runaov = function(id, dv, data, w){
fit = aov_ez(id,dv,data,within=w)
print(fit)
fit2 <- lsmeans(fit,specs = c(w))
contrast(fit2,method="pairwise",adjust="holm")
}
# Conditions to compare after ANOVAs
my_comparisons <- list( c("Audio", "AV-Intended"), c("Audio", "AV-Random"), c("Audio", "Visual"), c("AV-Intended", "AV-Random"), c("AV-Intended", "Visual"), c("AV-Random", "Visual"))
my_comparisons = rev(my_comparisons) # reverse order so more readable on plots
# Choose plotting style (violin or ridge) for future plots
customplot = plotRidges
hide_ns = FALSE
Time spent
How much time did participants spend experiencing the pieces in the
different conditions? Did they spend more time in one condition
vs. another?
z_means$logtime = log(z_means$mean_timespent)
p = customplot(z_means, "condition", "mean_timespent", "", "Condition", "Mean Time Spent (secs)")
plotTimeSecs = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
p = customplot(z_means, "condition", "logtime", "", "", "Mean Time Spent (log secs)")
plotTimeLog = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
z_means$exp_subject_id = as.factor(z_means$exp_subject_id)
runaov("exp_subject_id","mean_timespent", z_means, "condition")
Anova Table (Type 3 tests)
Response: mean_timespent
Effect df MSE F ges p.value
1 condition 2.00, 400.21 237.00 96.93 *** .100 <.001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.616 0.948 200 -0.650 0.5165
AV.Intended - Audio -4.908 0.800 200 -6.134 <.0001
AV.Intended - Visual 15.067 1.469 200 10.256 <.0001
AV.Random - Audio -4.292 0.942 200 -4.554 <.0001
AV.Random - Visual 15.683 1.514 200 10.361 <.0001
Audio - Visual 19.975 1.600 200 12.481 <.0001
P value adjustment: holm method for 6 tests
# print means and sds
cond_means = z_means %>%
group_by(condition) %>%
summarise(mean_timespent = mean(mean_timespent, na.rm=T))
cond_sds = z_means %>%
group_by(condition) %>%
summarise(sd_timespent = sd(mean_timespent, na.rm=T))
print(cond_means)
print(cond_sds)
plotTimeSecs
plotTimeLog
plotTimeSecs = plotTimeSecs + labs(title = "Time Spent", tag = "A") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
We find a main effect of condition. Post-hoc tests show significant
differences between all conditions, except AVI and AVR. Results do not
change whether we use raw mean time spent or log of it.
Enjoyment
Did enjoyment differ by condition? (Q to participants: I enjoyed the
piece (0-100))
p = customplot(z_means, "condition", "mean_enjoy", "", "Condition", "Mean Enjoyment (z-score)")
plotEnjoy = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_enjoy", z_means, "condition")
Anova Table (Type 3 tests)
Response: mean_enjoy
Effect df MSE F ges p.value
1 condition 2.76, 552.98 0.33 13.83 *** .065 <.001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.01486 0.0484 200 -0.307 1.0000
AV.Intended - Audio 0.28382 0.0524 200 5.416 <.0001
AV.Intended - Visual -0.00333 0.0544 200 -0.061 1.0000
AV.Random - Audio 0.29869 0.0546 200 5.475 <.0001
AV.Random - Visual 0.01154 0.0544 200 0.212 1.0000
Audio - Visual -0.28715 0.0654 200 -4.388 0.0001
P value adjustment: holm method for 6 tests
plotEnjoy
plotEnjoy = plotEnjoy + labs(title = "Enjoyment", tag = "C") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
There is a significant main effect of condition on Enjoyment.
Post-hoc contrasts show that AVI and AVR are higher in enjoyment than A,
but not V, or each other. (V is also higher than A) So, even though
people spent more time with audio, they enjoy more with visual.
Interestingness
Did interestingness differ by condition? (Q to participants: I found
the piece interesting (0-100))
p = customplot(z_means, "condition", "mean_interesting", "", "Condition", "Mean Interestingness (z-score)")
plotInterest = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_interesting", z_means, "condition")
Anova Table (Type 3 tests)
Response: mean_interesting
Effect df MSE F ges p.value
1 condition 2.65, 526.68 0.35 13.06 *** .062 <.001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.0412 0.0460 199 -0.897 0.5345
AV.Intended - Audio 0.2781 0.0530 199 5.249 <.0001
AV.Intended - Visual 0.0637 0.0573 199 1.112 0.5345
AV.Random - Audio 0.3193 0.0537 199 5.943 <.0001
AV.Random - Visual 0.1050 0.0532 199 1.973 0.1496
Audio - Visual -0.2144 0.0680 199 -3.154 0.0075
P value adjustment: holm method for 6 tests
plotInterest
plotInterest = plotInterest + labs(title = "Interestingness", tag = "D") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
These interestingness results are basically identical to the ones
above for enjoyment.
Mood Congruency
Did participants feel the piece matched their current mood more in
certain conditions? (Q to participants: The piece matched my current
mood (0-100))
p = customplot(z_means, "condition", "mean_mood_cong", "", "Condition", "Mean Mood Congruency (z-score)")
plotMood = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_mood_cong", z_means, "condition")
Anova Table (Type 3 tests)
Response: mean_mood_cong
Effect df MSE F ges p.value
1 condition 2.84, 564.37 0.30 3.61 * .018 .015
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.0256 0.0454 199 -0.563 0.5910
AV.Intended - Audio 0.0896 0.0526 199 1.703 0.3607
AV.Intended - Visual -0.0827 0.0540 199 -1.533 0.3809
AV.Random - Audio 0.1152 0.0519 199 2.219 0.1380
AV.Random - Visual -0.0571 0.0545 199 -1.049 0.5910
Audio - Visual -0.1723 0.0609 199 -2.831 0.0307
P value adjustment: holm method for 6 tests
plotMood
plotMood = plotMood + labs(title = "Mood Congruency", tag = "E") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
There is a significant main effect of condition with repect to
whether people think the piece matches their current mood. Post-hoc
contrasts show that people are significantly more likely to say the
piece matches their mood in V vs. A conditions. But no other post-hoc
contrasts were significant.
Feeling moved
Did participants feel moved by the piece more in certain conditions?
(Q to participants: The piece moved me (0-100))
p = customplot(z_means, "condition", "mean_moved", "", "Condition", "Mean Feeling Moved (z-score)")
runaov("exp_subject_id","mean_moved", z_means, "condition")
Anova Table (Type 3 tests)
Response: mean_moved
Effect df MSE F ges p.value
1 condition 2.83, 561.24 0.30 9.12 *** .044 <.001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.0265 0.0453 198 -0.584 1.0000
AV.Intended - Audio 0.1667 0.0539 198 3.092 0.0068
AV.Intended - Visual 0.1987 0.0544 198 3.651 0.0013
AV.Random - Audio 0.1932 0.0520 198 3.714 0.0013
AV.Random - Visual 0.2252 0.0535 198 4.209 0.0002
Audio - Visual 0.0320 0.0611 198 0.523 1.0000
P value adjustment: holm method for 6 tests
plotMoved = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
plotMoved
plotMoved = plotMoved + labs(title = "Feeling Moved", tag = "F") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
Significant main effect of condition on feeling moved. Post-hoc tests
shows no significant differences in feeling moved between A vs. V or
between AVI vs. AVR, but all other contrasts were significant. This is
interesting and potentially a “load” effect (i.e., bimodal is generally
more moving than uni-modal)
Audio-visual correspondence
Could participants tell above chance whether the audio and visual
materials corresponded to one another? (Q to participants: To what
degree did you feel there was a correspondence between the audio and
visual sensations you were experiencing?)
# only zscore within relevant AV conditions
correspdata = data_task[data_task$condition == "AV-Intended" | data_task$condition == "AV-Random", ]
zscored_corresp = correspdata %>%
group_by(exp_subject_id) %>%
mutate(z_corresp = zscore(correspondence_))
mean_z_corresp = zscored_corresp %>%
group_by(exp_subject_id, condition) %>%
summarise(z_corresp = mean(z_corresp))
# plot and calculate stats
p <- customplot(mean_z_corresp, "condition", "z_corresp", "", "Condition", "Mean Correspondence (z-score)")
plotAVcorresp = p + stat_compare_means(comparisons = list(c("AV-Intended", "AV-Random")), label = "p.signif", method = "t.test", paired="true", label.x.npc=.5)
compare_means(z_corresp ~ condition, data = mean_z_corresp, method = "t.test", hide.ns=hide_ns)
plotAVcorresp
plotAVcorresp = plotAVcorresp + labs(title = "Audiovisual Correspondence", tag = "B") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
There is a significant difference between AVI and AVR conditions in
terms of felt correspendence (i.e., particpants felt that the
intentional pairings showed greater A-V correspondence than the random
pairings). It is quite interesting then, that even though they could
tell the difference in correspondence, there were no differences in
terms of enjoyment, interestingness, etc. (see all prior plots and
stats).
Factor analysis on ratings data
Organize all data we want into one table
# merge participants means and demo data
pall = merge(data_participant, z_means, by="exp_subject_id")
# remove columns we don't need
pall = pall[ , -which(names(pall) %in% c("country_current", "country_home","prev_exposure","area_intense_aes_exp","Modality_Preference_cat", "area_creative_beh","area_aes_apprec"))]
# "gender","Modality_Preference","Ollen",, "exp_subject_id","group_name",
pall$mean_timespent = log(pall$mean_timespent)
# make sure variable right type
# Remove the 'option_' prefix and keep only the numbers for the ordered categorical columns
data$Modality_Preference <- gsub('option_', '', data$Modality_Preference)
# Convert the column to numeric
data$Modality_Preference <- as.numeric(data$Modality_Preference)
# Convert to ordered factor
pall$Modality_Preference = factor(pall$Modality_Preference)
# Do same for ollen scale
data$Ollen <- gsub('option_', '', data$Ollen)
data$Ollen <- as.numeric(data$Ollen)
pall$Ollen = factor(pall$Ollen)
# gender is unordered
pall$gender = factor(pall$gender, ordered = F)
#chart.Correlation(pall, histogram=TRUE, pch=25, text.panel=labs)
Check for suitability of FA
# organize data
forfa = z_means[ , -which(names(z_means) %in% c("mean_timespent","logtime", "condition","exp_subject_id", "factor_score"))]
# check for missing values
nna = sum(is.na(forfa)) # 16 nas in the df
# N.B. NAs have been introduced by z-scoring, due to dividing by zero (no variance)
# Since the true mean was zero, it is fair to impute with zero
forfa$mean_enjoy[is.na(forfa$mean_enjoy)] <- 0
forfa$mean_mood_cong[is.na(forfa$mean_mood_cong)] <- 0
forfa$mean_moved[is.na(forfa$mean_moved)] <- 0
forfa$mean_interesting[is.na(forfa$mean_interesting)] <- 0
# Do tests to determine suitability for FA
print("KMO")
[1] "KMO"
KMO(r=cor(forfa))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(forfa))
Overall MSA = 0.8
MSA for each item =
mean_enjoy mean_interesting mean_mood_cong mean_moved
0.73 0.75 0.88 0.90
print("Bartlett")
[1] "Bartlett"
cortest.bartlett(forfa)
$chisq
[1] 2127.938
$p.value
[1] 0
$df
[1] 6
print("Determinant")
[1] "Determinant"
det(cor(forfa))
[1] 0.07014754
Kaiser’s (1974) guidelines, suggest cutoff of KMO ≥ 60 for
determining the factorability of the sample data. Here was are well
above, suggesting suitability for FA. P value for Bartlett’s test is
significant. Determinant is positive, indicating FA will likely run.
Determine number of factors
fafitfree <- fa(forfa, nfactors = ncol(forfa), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree <- data.frame(
Factor_n = as.factor(1:n_factors),
Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) +
geom_point() + geom_line() +
xlab("Number of factors") +
ylab("Initial eigenvalue") +
labs( title = "Scree Plot",
subtitle = "(Based on the unreduced correlation matrix)")
fa.none <- fa(r=forfa,
nfactors = 1,
fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
max.iter=100,
rotate="varimax") # none rotation
print(fa.none)
Factor Analysis using method = pa
Call: fa(r = forfa, nfactors = 1, rotate = "varimax", max.iter = 100,
fm = "pa")
Standardized loadings (pattern matrix) based upon correlation matrix
PA1
SS loadings 2.75
Proportion Var 0.69
Mean item complexity = 1
Test of the hypothesis that 1 factor is sufficient.
df null model = 6 with the objective function = 2.66 with Chi Square = 2127.94
df of the model are 2 and the objective function was 0.07
The root mean square of the residuals (RMSR) is 0.03
The df corrected root mean square of the residuals is 0.05
The harmonic n.obs is 804 with the empirical chi square 9.47 with prob < 0.0088
The total n.obs was 804 with Likelihood Chi Square = 53.1 with prob < 2.9e-12
Tucker Lewis Index of factoring reliability = 0.928
RMSEA index = 0.178 and the 90 % confidence intervals are 0.139 0.221
BIC = 39.72
Fit based upon off diagonal values = 1
Measures of factor score adequacy
PA1
Correlation of (regression) scores with factors 0.96
Multiple R square of scores with factors 0.93
Minimum correlation of possible factor scores 0.85
Concatenate scores with other data
z_means['factor_score'] <- fa.none$scores
Run same anova analyses as above but on factor score
p = plotRidges(z_means, "condition", "factor_score", "", "Condition", "Mean Factor Score")
runaov("exp_subject_id","factor_score", z_means, "condition")
Anova Table (Type 3 tests)
Response: factor_score
Effect df MSE F ges p.value
1 condition 2.72, 543.46 1.29 11.85 *** .056 <.001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
Sphericity correction method: GG
contrast estimate SE df t.ratio p.value
AV.Intended - AV.Random -0.0506 0.0916 200 -0.552 0.9428
AV.Intended - Audio 0.5237 0.1047 200 5.001 <.0001
AV.Intended - Visual 0.0774 0.1073 200 0.722 0.9428
AV.Random - Audio 0.5743 0.1058 200 5.429 <.0001
AV.Random - Visual 0.1280 0.1047 200 1.222 0.6691
Audio - Visual -0.4463 0.1300 200 -3.434 0.0029
P value adjustment: holm method for 6 tests
factorPlot = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired="True", na.rm=TRUE)
factorPlot = factorPlot + labs(y = "Mean Factor Score (Aesthetic Experience)")
factorPlot
It basically mimics enjoyment results, as that item has highest
factor loading. But there was clearly something more interesting going
on with all of the small differences in the ANOVAs reported above
ratings = z_means[ , -which(names(z_means) %in% c("exp_subject_id","condition", "mean_corresp"))] # drop label columns and ones not using
ratings$mean_timespent = log(ratings$mean_timespent)
par(mfrow = c(3,1))
chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
fa.diagram(fa.none, digits=2, cut = .3, simple=TRUE)
#factorPlot
---
title: "R Notebook for Fink, Fiehn, Wald-Fuhrmann (2024) analyses"
output: html_notebook
authors: Lauren Fink & Hannah Fiehn
contact: finkl1@mcmaster.ca
---

This notebook accompanies the manuscript:

Fink, Fiehn & Wald-Fuhrmann (2024). The role of audiovisual congruence in aesthetic appreciation of contemporary music and visual art. *Scientific Reports*, 71399. DOI: [https://doi.org/10.1038/s41598-024-71399-y](https://doi.org/10.1038/s41598-024-71399-y)

*Pre-print available at:* [osf.io/hjgc5/](osf.io/hjgc5/)

If using any code or data from this notebook, please cite the paper. 

Additional code and information accompanying this R notebook are available in the associated GitHub repository: [https://github.com/lkfink/Kentler_MIM_Behavior](https://github.com/lkfink/Kentler_MIM_Behavior)

# Load and preprocess data
Turn off warnings for cleaner submission file. N.B. users of this file should turn warnings on for troubleshooting
```{r setup, include=FALSE} 
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
```

### Install required packages, if not already installed
```{r}
list.of.packages <- c("tidyverse", "ggpubr", "ez", "corrplot", "car", "stats","PerformanceAnalytics", "afex", "emmeans", "mlma", "ggplot2", "ggExtra", "psych", "ggpubr", "here", "gridExtra", "ggridges")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages) 
```

### Load packages and data
We use the *here* package so that all file paths are relative. Users should be sure not to change the directory structure of the repository. 
By default, figures will be plotted in line. If you want to save the figures to a folder on your local machine, set op = 1. 
```{r}
here::i_am("Fink_Kentler_analyses.Rmd")
library(here)
source(here("dependencies.R"))
data <- read.csv(here("data.csv")) 
fig_path <- here("figures/") # path to save generated figures to
op = 0 # flag if want to save output plots to files (1) or plot inline (0)
```

### Pre-process the data
Clean up variable names, add combined columns of interest, drop unnecessary columns
```{r}
# remove variables we don't need (Labvanced still outputting old vars from earlier exp. version)
data_new <- subset(data, select = - c(Block_Name, Task_Nr, aud_tend_unix, aud_ts, aud_ts_unix, condi3, copy_of_condi2, copy_of_condi, frame_end_unix, frame_rt, Name.Audio._copy_4b49, Name.Audio._copy_93a7, Name.Audio._copy_bd6f, Name.Audio._copy_c958, Name.Audio._copy_c96c, Name.Audio., subj_counter_global, subj_counter_per_group, time_delay_offset, time_measure_std, pixelDensityPerMM,unlocked))

# filter out people who have seen/heard stimuli before (some of our pilots) or who don't have normal hearing / vision
exposed = table(data$prev_exposure)
data_new <- data_new[-c(which(data_new$prev_exposure == "option_0")), ]
normSenses = table(data$Normal.Vision.) # no one without
data_new <- data_new[-c(which(data_new$completed == "no")), ] # remove incompletes
data = data_new # overwrite old data so can't accidentally use it

#create a dataframe just with the task block (for further analysis )
data_task <- data_new %>% 
  select(Trial_Nr, Trial_Id, correspondence_, enjoy_, Frame.Time., interestingness_, mood.congruency_, move_, subject_code, Block_Nr, group_name, exp_subject_id)

data_task <- data_task[, c("exp_subject_id", "Block_Nr", "Trial_Nr", "Trial_Id","group_name", "Frame.Time.", "correspondence_", "enjoy_", "interestingness_", "mood.congruency_", "move_" )]

# put time spent in secs (instead of ms)
data_task$Frame.Time. = data_task$Frame.Time. / 1000

# Add column for condition 
# Trial_Id tells which condition it is (1 - 4 = A; 5 - 8 = V; 9 - 12 = AVI; 13 - 16 = AVR)
# with Trial_Id and the group number we can look which specific stimuli are used 
data_task$Trial_Id = as.numeric(data_task$Trial_Id)
data_task$condition = NA
data_task$condition[data_task$Trial_Id <= 4] = 'Audio' 
data_task$condition[data_task$Trial_Id >= 5 & data_task$Trial_Id <= 8] = 'Visual' 
data_task$condition[data_task$Trial_Id >= 9 & data_task$Trial_Id <= 12] = 'AV-Intended' 
data_task$condition[data_task$Trial_Id >= 13] = 'AV-Random' 

# create subset data frame of task data
#just keep Block Number = 2 (these are the experimental trials)
data_task <- data_task %>%
  filter(Block_Nr == 2)

# create subset data frame with just participant info
data_participant <- data_new %>%
  select(exp_subject_id, group_name, age_input, gender, country_current, country_home, Modality_Preference, Ollen, overall_enjoy, "prev_exposure", realWorldLikelihood)

data_participant = na.omit(data_participant)

# calculate area scores for each participant and add to participant data
# AReA scale scoring
#Aesthetic Appreciation: Items 1, 2, 3, 4, 6, 9, 13, 14
#Intense Aesthetic Experience: Items 8, 11, 12, 13
#Creative Behavior: Items 5, 7, 10
area_data = data[data$Task_Name == 'AReA',]
area_data$area_aes_apprec = area_data$area_01 + area_data$area_02 + area_data$area_03 + area_data$area_04 + area_data$area_06 + area_data$area_09 + area_data$area_13 + area_data$area_14
area_data$area_aes_apprec = area_data$area_aes_apprec / 8 # divide by num of items
area_data$area_intense_aes_exp = area_data$area_08 + area_data$area_11 + area_data$area_12 + area_data$area_13
area_data$area_intense_aes_exp = area_data$area_intense_aes_exp / 4
area_data$area_creative_beh = area_data$area_05 + area_data$area_07 + area_data$area_10
area_data$area_creative_beh = area_data$area_creative_beh / 3
area_data$area_composite = (area_data$area_aes_apprec + area_data$area_intense_aes_exp + area_data$area_creative_beh) / 3

# add area scores to mean table (join based on exp_subj_id)
area_data = area_data[,c("exp_subject_id", "area_aes_apprec", "area_intense_aes_exp", "area_creative_beh", "area_composite")]

# merge area with participant df
data_participant = merge(x=data_participant,y=area_data, 
                         by="exp_subject_id", all=TRUE)

# clean up modality pref data to have meaningful labels
temp = data_participant$Modality_Preference
temp = gsub("option_0", "prefer_audio", temp)
temp = gsub("option_1", "prefer_visual", temp)
temp = gsub("option_2", "prefer_audiovisual", temp)
temp = gsub("option_3", "no_preference", temp)
data_participant$Modality_Preference_cat = temp

# join participant info with trial level data for final full data frame
full_data = merge(x=data_task,y=data_participant, 
                  by="exp_subject_id", all=TRUE)
```

Check participant number discrepencies between task data and demo data
```{r}
# something wrong! only 195 obs for people who saw endQs ?! (these were required questions. not sure how labvanced let that through)
# table(data_new$Task_Name)

# check what going on with 6 subs
check = subset(full_data, is.na(full_data$Ollen))
check2 = subset(data, is.na(full_data$Ollen))

nsubs = length(unique(full_data$exp_subject_id))
nsubs_missingDemo = length(unique(check$exp_subject_id))
```
It seems 6 participants never got the ending questions. We can include them for all trial analyses but they will be excluded from person-level analyses

# Person-level analyses

### Explore demographic data
```{r}
gender_dist = table(data_participant$gender)
age_mean = mean(data_participant$age_input, na.rm = TRUE)
age_std = sd(data_participant$age_input, na.rm = TRUE)
ncountries = length(unique(data_participant$country_current))
countries = table(data_participant$country_current)
modality_pref = table(data_participant$Modality_Preference_cat) 

print("Gender: man, woman, non-binary, prefer not to say")
print(gender_dist)

print("Age (mean, sd, min, max)")
print(age_mean)
print(age_std)
print(min(data_participant$age_input, na.rm = TRUE))
print(max(data_participant$age_input, na.rm = TRUE))

print("Unique countries:")
print(ncountries)
print(countries)

print("AReA (mean, sd, min, max):")
print(mean(data_participant$area_composite))
print(sd(data_participant$area_composite))
print(min(data_participant$area_composite))
print(max(data_participant$area_composite))

print("AReA AA (mean, sd, min, max):")
print(mean(data_participant$area_aes_apprec))
print(sd(data_participant$area_aes_apprec))
print(min(data_participant$area_aes_apprec))
print(max(data_participant$area_aes_apprec))

print("AReA IAE (mean, sd, min, max):")
print(mean(data_participant$area_intense_aes_exp))
print(sd(data_participant$area_intense_aes_exp))
print(min(data_participant$area_intense_aes_exp))
print(max(data_participant$area_intense_aes_exp))

print("AReA CB (mean, sd, min, max):")
print(mean(data_participant$area_creative_beh))
print(sd(data_participant$area_creative_beh))
print(min(data_participant$area_creative_beh))
print(max(data_participant$area_creative_beh))
```

### Plot experiment-related person-level variables
```{r}
# clean up and plot ollen data
ollenScale = c("non-musician", "music-loving non-musician", "amateur musician", "serious amateur musician", "semi-professional musician", "professional musician")

ollen = table(data_participant$Ollen)
ollen = data.frame(ollen)
names(ollen) = c("Response","Participants")
ollen$Response = ollenScale[1:5] # nobody chose professional!

print(ollen)
describe(ollen$Participants)

# order Ollen properly
ollen <- within(ollen, Response <- factor(Response, levels=ollenScale))

# create barplot
# png("Kentler_ollen.png", units="in", width=8, height=5, res=300)
ggplot(ollen, aes(x=Response, y=Participants)) + 
  geom_bar(stat = "identity") + coord_flip() + theme_classic() + theme(text = element_text(size = 20))

# Plot area data
ggplot(data_participant, aes(x=area_composite)) + 
  geom_histogram(bins = 20) + ggtitle("AReA composite score")


# Plot enjoyment data
ggplot(data_participant, aes(x=overall_enjoy)) + 
  geom_histogram(bins = 25) + ggtitle("Overall enjoyment of the experiment")

ggplot(data_participant, aes(x=realWorldLikelihood)) + 
  geom_histogram(bins = 25) + ggtitle("Likelihood of seeking similar pieces in real world")

print("Enjoyment (mean, sd)")
print(mean(data_participant$overall_enjoy, na.rm=T))
print(sd(data_participant$overall_enjoy, na.rm=T))

print("Likelihood (mean, sd)")
print(mean(data_participant$realWorldLikelihood, na.rm=T))
print(sd(data_participant$realWorldLikelihood, na.rm=T))

# plot relation between enjoyment and likelihood
p <- ggplot(data_participant, aes(x=overall_enjoy, y=realWorldLikelihood)) +
  geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
  theme(legend.position="none") + theme_classic()

# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray") 
p2 

# Self-reported modality prefs
# create barplot of modality preferences
mpref = data.frame(modality_pref)
names(mpref) = c("Response","Participants")
mpref$percent = mpref$Participants / sum(mpref$Participants)


# png("Kentler_preference.png", units="in", width=8, height=5, res=300) 
ggplot(mpref, aes(x=Response, y=Participants)) + 
  geom_bar(stat = "identity") + coord_flip() + theme_classic() + theme(text = element_text(size = 20))
```
### Plot relationship between all person-level vars
```{r}
# create data frame for correlation
# need to convert categorical to numeric 
corrdf = data_participant[ , which(names(data_participant) %in% c("Ollen", "Modality_Preference", "overall_enjoy", "realWorldLikelihood", "area_aes_apprec", "area_intense_aes_exp", "area_creative_beh", "area_composite"))] 

corrdf$Modality_Preference = as.numeric(as.factor(corrdf$Modality_Preference))
corrdf$Ollen = as.numeric(as.factor(corrdf$Ollen))

chart.Correlation(corrdf, histogram=TRUE, pch=25, text.panel=labs)
```

# Condition-level analyses

### Create tables of means
Z-score ratings data within scale, participant  
Calculate table of means for all participants / conditions
- do this for both z-scored and raw data in case we want to compare
```{r}
# create zscore function
zscore <- function(input){
  out = (input - mean(input))/sd(input)
}

# zscore data
zscored = data_task %>%
  group_by(exp_subject_id) %>%
  mutate(z_enjoy = zscore(enjoy_), z_interesting = zscore(interestingness_), z_mood_cong = zscore(mood.congruency_), z_moved = zscore(move_))
# NOTE zscoring correspondence needs to be done only for AV conditions

# get means of zscored data
z_means = zscored %>%
  group_by(exp_subject_id, condition) %>%
  summarise(mean_timespent = mean(Frame.Time.), mean_enjoy = mean(z_enjoy), mean_interesting = mean(z_interesting), mean_mood_cong = mean(z_mood_cong), mean_moved = mean(z_moved))


# get means of zscored data
# z_means_participant = z_means %>%
#   group_by(exp_subject_id) %>%
#   summarise(mean_timespent = mean(mean_timespent), mean_enjoy = mean(mean_enjoy), mean_interesting = mean(mean_interesting), mean_mood_cong = mean(mean_mood_cong), mean_moved = mean(mean_moved))
# 

# get raw means in case want to compare to zscored data
means = data_task %>%
  group_by(exp_subject_id, condition) %>%
  summarise(mean_timespent = mean(Frame.Time.), mean_enjoy = mean(enjoy_), mean_interesting = mean(interestingness_), mean_mood_cong = mean(mood.congruency_), mean_moved = mean(move_), mean_corresp = mean(correspondence_))

# get raw sds in case want to compare to zscored data
sds = data_task %>%
  group_by(exp_subject_id, condition) %>%
  summarise(mean_timespent = sd(Frame.Time.), mean_enjoy = sd(enjoy_), mean_interesting = sd(interestingness_), mean_mood_cong = sd(mood.congruency_), mean_moved = sd(move_), mean_corresp = sd(correspondence_))

```


### Create custom functions for analysis and plotting
We have multiple DVs on which we want to run the same type of analysis
```{r}
# Function to plot violins for each condition
# Input: dataframe, variable to plot, title for plot
plotViolin = function(data, groupvar, var, titstr, xlab, ylab){
  
  # define colors
  AVR = '#8c1aff'
  AVI = '#cc99ff'
  V = '#ff4d4d'
  A = '#6699ff'
  ourcolors = c(A, AVI, AVR, V)
  
  # only AV conditions relevant
  if (var == "z_corresp"){
    ourcolors = c(AVI, AVR) 
  }
  
  # Plot
  var = sym(var)
  groupvar = sym(groupvar)

  pl = data %>%
    ggplot(aes(x=!!groupvar, y=!!var, fill=!!groupvar)) +
    geom_violin(legend = FALSE, trim = FALSE, adjust = 1, draw_quantiles = c(0.25, 0.75), size=.5, linetype = "dashed", color="black") + #.8 
    geom_violin(legend = FALSE, trim = FALSE, adjust = 1, draw_quantiles = 0.5, fill="transparent", size=.85, color="black") + #1.2
    
    geom_jitter(size=0.4, alpha=0.6, color="gray") +
    scale_fill_manual(values=ourcolors) +
    #theme_classic() +
    theme_ridges() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle(titstr) +
    xlab(xlab) +
    ylab(ylab)
  
  return(pl)
  
}



# Function to create density ridge plots for each condition
# Input: dataframe, variable to plot, title for plot
plotRidges = function(data, groupvar, var, titstr, xlab, ylab){
  
  # define colors
  AVR = '#CCBB44'
  AVI = '#228833'
  V = '#CC6677'
  A = '#88CCEE'
  ourcolors = c(A, AVI, AVR, V)
  
  # only AV conditions relevant
  if (var == "z_corresp"){
    ourcolors = c(AVI, AVR) 
  }
  
  # Plot
  var = sym(var)
  groupvar = sym(groupvar)

  pl = data %>%
    ggplot(aes(x=!!var, y=!!groupvar, fill=!!groupvar)) +
    
   # stat_density_ridges(quantile_lines=TRUE, quantiles = c(0.025, 0.975), color="black", alpha = 1) +
    
    geom_density_ridges(inherit.aes = TRUE, jittered_points = TRUE, stat ="density_ridges",
      position = position_points_jitter(width = 0.05, height = 0),
      point_shape = '|', point_size = 3, point_alpha = .9, alpha = 0.7) +
    
    geom_density_ridges(alpha = 0.85, quantile_lines=TRUE,
                        color="white", quantile_fun=function(var,...)mean(var)) +
    
    scale_fill_manual(values=ourcolors) +
    xlab(ylab) +
    theme_ridges() + theme(legend.position = "none") +
    theme(axis.title.x = element_text(hjust = 0.5)) + 
          theme(axis.title.y=element_blank())
  
  return(pl)
  
}


# Function to run rm anova and output stats
runaov = function(id, dv, data, w){
  fit = aov_ez(id,dv,data,within=w)
  print(fit)
  fit2 <- lsmeans(fit,specs = c(w))
  contrast(fit2,method="pairwise",adjust="holm")
}

# Conditions to compare after ANOVAs
my_comparisons <- list( c("Audio", "AV-Intended"), c("Audio", "AV-Random"), c("Audio", "Visual"), c("AV-Intended", "AV-Random"),  c("AV-Intended", "Visual"), c("AV-Random", "Visual"))
my_comparisons = rev(my_comparisons) # reverse order so more readable on plots

# Choose plotting style (violin or ridge) for future plots
customplot = plotRidges
hide_ns = FALSE
```

## Time spent
How much time did participants spend experiencing the pieces in the different conditions? Did they spend more time in one condition vs. another?
```{r}
z_means$logtime = log(z_means$mean_timespent)
p = customplot(z_means, "condition", "mean_timespent", "", "Condition", "Mean Time Spent (secs)")
plotTimeSecs = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)

p = customplot(z_means, "condition", "logtime", "", "", "Mean Time Spent (log secs)")
plotTimeLog = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
z_means$exp_subject_id = as.factor(z_means$exp_subject_id)

runaov("exp_subject_id","mean_timespent", z_means, "condition")

# print means and sds
cond_means = z_means %>%
  group_by(condition) %>%
  summarise(mean_timespent = mean(mean_timespent, na.rm=T))   

cond_sds = z_means %>%
  group_by(condition) %>%
  summarise(sd_timespent = sd(mean_timespent, na.rm=T))

print(cond_means)
print(cond_sds)

plotTimeSecs
plotTimeLog

plotTimeSecs = plotTimeSecs + labs(title = "Time Spent", tag = "A") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
```

We find a main effect of condition. Post-hoc tests show significant differences between all conditions, except AVI and AVR. Results do not change whether we use raw mean time spent or log of it. 



## Enjoyment
Did enjoyment differ by condition? (Q to participants: I enjoyed the piece (0-100))
```{r}
p = customplot(z_means, "condition", "mean_enjoy", "", "Condition", "Mean Enjoyment (z-score)")
plotEnjoy = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_enjoy", z_means, "condition")
plotEnjoy
plotEnjoy = plotEnjoy + labs(title = "Enjoyment", tag = "C") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
```

There is a significant main effect of condition on Enjoyment. Post-hoc contrasts show that AVI and AVR are higher in enjoyment than A, but not V, or each other. (V is also higher than A)
So, even though people spent more time with audio, they enjoy more with visual. 

## Interestingness
Did interestingness differ by condition? (Q to participants: I found the piece interesting (0-100))
```{r}
p = customplot(z_means, "condition", "mean_interesting", "", "Condition", "Mean Interestingness (z-score)")
plotInterest = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_interesting", z_means, "condition")
plotInterest
plotInterest = plotInterest + labs(title = "Interestingness", tag = "D") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))

```

These interestingness results are basically identical to the ones above for enjoyment. 

## Mood Congruency
Did participants feel the piece matched their current mood more in certain conditions? (Q to participants: The piece matched my current mood (0-100))
```{r}
p = customplot(z_means, "condition", "mean_mood_cong", "", "Condition", "Mean Mood Congruency (z-score)")
plotMood = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
runaov("exp_subject_id","mean_mood_cong", z_means, "condition")
plotMood
plotMood = plotMood + labs(title = "Mood Congruency", tag = "E") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
```

There is a significant main effect of condition with repect to whether people think the piece matches their current mood. Post-hoc contrasts show that people are significantly more likely to say the piece matches their mood in V vs. A conditions. But no other post-hoc contrasts were significant.

## Feeling moved
Did participants feel moved by the piece more in certain conditions? (Q to participants: The piece moved me (0-100))
```{r}
p = customplot(z_means, "condition", "mean_moved", "", "Condition", "Mean Feeling Moved (z-score)")
runaov("exp_subject_id","mean_moved", z_means, "condition")
plotMoved = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired = "true", na.rm=TRUE, hide.ns=hide_ns)
plotMoved
plotMoved = plotMoved + labs(title = "Feeling Moved", tag = "F") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
```

Significant main effect of condition on feeling moved. Post-hoc tests shows no significant differences in feeling moved between A vs. V or between AVI vs. AVR, but all other contrasts were significant. This is interesting and potentially a "load" effect (i.e., bimodal is generally more moving than uni-modal)

## Audio-visual correspondence
Could participants tell above chance whether the audio and visual materials corresponded to one another? (Q to participants: To what degree did you feel there was a correspondence between the audio and visual sensations you were experiencing?)
```{r}
# only zscore within relevant AV conditions
correspdata = data_task[data_task$condition == "AV-Intended" | data_task$condition == "AV-Random", ]
zscored_corresp = correspdata %>%
  group_by(exp_subject_id) %>%
  mutate(z_corresp = zscore(correspondence_)) 

mean_z_corresp = zscored_corresp %>%
  group_by(exp_subject_id, condition) %>%
  summarise(z_corresp = mean(z_corresp))

# plot and calculate stats
p <- customplot(mean_z_corresp, "condition", "z_corresp", "", "Condition", "Mean Correspondence (z-score)")
plotAVcorresp = p + stat_compare_means(comparisons = list(c("AV-Intended", "AV-Random")), label = "p.signif", method = "t.test", paired="true", label.x.npc=.5)
compare_means(z_corresp ~ condition,  data = mean_z_corresp, method = "t.test", hide.ns=hide_ns)
plotAVcorresp
plotAVcorresp = plotAVcorresp + labs(title = "Audiovisual Correspondence", tag = "B") + theme(plot.title = element_text(hjust = 0.5, face='bold'), plot.tag = element_text(face='bold'))
 
```

There is a significant difference between AVI and AVR conditions in terms of felt correspendence (i.e., particpants felt that the intentional pairings showed greater A-V correspondence than the random pairings). It is quite interesting then, that even though they could tell the difference in correspondence, there were no differences in terms of enjoyment, interestingness, etc. (see all prior plots and stats).


# Figure 1 for paper
Now create one figure which tiles all of our previous figures
```{r}
if (op) {tiff(paste(fig_path, "combined_DVs_ridge_r1.tiff", sep=""), units="in", width=9, height=11, res=300)}
plotAll = grid.arrange(plotTimeSecs, plotAVcorresp, plotEnjoy, plotInterest, plotMood, plotMoved, ncol = 2)
plotAll

ggsave("combined_DVs_ridge_r1.svg", plotAll, width=9, height=11, units = "in")

```

NOTE: In final, publication-ready figure, I manually removed non-significant comparison brackets, as well as brackets with p values that did not withstand multiple comparisons. 






# Supplementary Analysis (not included in the paper)

## How do our different measures correlate with one another?
Note, 4 observations per condition for every participant. Can avg by participant.
Construct 4 separate correlation matrices and see if differences in structure based on modality?

### Overall correlation with all conditions included
```{r}
# remove ratio diff and other columns we don't need 
ratings = z_means[ , -which(names(z_means) %in% c("exp_subject_id","condition", "mean_corresp", "logtime"))] # drop label columns and ones not using
ratings$mean_timespent = log(ratings$mean_timespent) # TODO check if want to do this or not

# Capture the plot so we can use it again later
chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
```

### Correlations by condition
#### Audio
```{r}
# remove ratio diff and other columns we don't need 
ratings = subset(z_means, condition == 'Audio')
ratings = ratings[ , -which(names(ratings) %in% c("exp_subject_id","condition", "mean_corresp", "logtime"))] # drop label columns and ones not using
ratings$mean_timespent = log(ratings$mean_timespent)

chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
```

#### Visual
```{r}
# remove ratio diff and other columns we don't need 
ratings = subset(z_means, condition == 'Visual')
ratings = ratings[ , -which(names(ratings) %in% c("exp_subject_id","condition", "mean_corresp", "logtime"))] # drop label columns and ones not using
ratings$mean_timespent = log(ratings$mean_timespent)

chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
```

#### AV-Intended
```{r}
# remove ratio diff and other columns we don't need 
ratings = subset(z_means, condition == 'AV-Intended')
corresp = subset(mean_z_corresp, condition == 'AV-Intended')
#ratings$mean_corresp = mean_z_corresp
ratings <- merge(x=ratings,y=corresp, 
                by="exp_subject_id")
ratings <- ratings %>% # rename
  rename(mean_corresp = z_corresp)
ratings = ratings[ , -which(names(ratings) %in% c("exp_subject_id","condition", "condition.x", "condition.y", "logtime"))] 
ratings$mean_timespent = log(ratings$mean_timespent)

chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
```


#### AV-random
```{r}
# remove ratio diff and other columns we don't need 
ratings = subset(z_means, condition == 'AV-Random')
corresp = subset(mean_z_corresp, condition == 'AV-Random')
ratings <- merge(x=ratings,y=corresp, 
                by="exp_subject_id")
ratings <- ratings %>% # rename
  rename(mean_corresp = z_corresp)
ratings = ratings[ , -which(names(ratings) %in% c("exp_subject_id","condition", "condition.x", "condition.y", "logtime"))] 
ratings$mean_timespent = log(ratings$mean_timespent)


chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
```
It seems our ratings scales and time spent measure show different relationships with each other, dependent on the condition. On the one hand this speaks against something like factor analysis to reduce dimensionality without taking condition into account. On the other hand, the rating scales consistently correlate with each other across conditions. Therefore, we could probably reduce the ratings, as we suggested in our pre-registration. Note that the relationship of the ratings with time spent does not change when using log time spent. 



# Factor analysis on ratings data

## Organize all data we want into one table
```{r}
# merge participants means and demo data
pall = merge(data_participant, z_means, by="exp_subject_id")

# remove columns we don't need 
pall = pall[ , -which(names(pall) %in% c("country_current", "country_home","prev_exposure","area_intense_aes_exp","Modality_Preference_cat", "area_creative_beh","area_aes_apprec"))]  
# "gender","Modality_Preference","Ollen",, "exp_subject_id","group_name",
pall$mean_timespent = log(pall$mean_timespent)

# make sure variable right type
# Remove the 'option_' prefix and keep only the numbers for the ordered categorical columns
data$Modality_Preference <- gsub('option_', '', data$Modality_Preference)
# Convert the column to numeric
data$Modality_Preference <- as.numeric(data$Modality_Preference)
# Convert to ordered factor
pall$Modality_Preference = factor(pall$Modality_Preference)

# Do same for ollen scale
data$Ollen <- gsub('option_', '', data$Ollen)
data$Ollen <- as.numeric(data$Ollen)
pall$Ollen = factor(pall$Ollen)

# gender is unordered
pall$gender = factor(pall$gender, ordered = F)

#chart.Correlation(pall, histogram=TRUE, pch=25, text.panel=labs)
```



## Check for suitability of FA
```{r}
# organize data
forfa = z_means[ , -which(names(z_means) %in% c("mean_timespent","logtime", "condition","exp_subject_id", "factor_score"))] 

# check for missing values
nna = sum(is.na(forfa)) # 16 nas in the df
# N.B. NAs have been introduced by z-scoring, due to dividing by zero (no variance)
# Since the true mean was zero, it is fair to impute with zero 
forfa$mean_enjoy[is.na(forfa$mean_enjoy)] <- 0
forfa$mean_mood_cong[is.na(forfa$mean_mood_cong)] <- 0
forfa$mean_moved[is.na(forfa$mean_moved)] <- 0
forfa$mean_interesting[is.na(forfa$mean_interesting)] <- 0

# Do tests to determine suitability for FA
print("KMO")
KMO(r=cor(forfa))
print("Bartlett")
cortest.bartlett(forfa)
print("Determinant")
det(cor(forfa))
```
Kaiser’s (1974) guidelines, suggest cutoff of KMO ≥ 60 for determining the factorability of the sample data. Here was are well above, suggesting suitability for FA. 
P value for Bartlett's test is significant. Determinant is positive, indicating FA will likely run.

## Determine number of factors
```{r}
fafitfree <- fa(forfa, nfactors = ncol(forfa), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot", 
        subtitle = "(Based on the unreduced correlation matrix)")
```


```{r}
fa.none <- fa(r=forfa, 
              nfactors = 1, 
              fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
              max.iter=100,
              rotate="varimax") # none rotation
print(fa.none)
```

## Concatenate scores with other data
```{r}
z_means['factor_score'] <- fa.none$scores
```

## Run same anova analyses as above but on factor score
```{r}
p = plotRidges(z_means, "condition", "factor_score", "", "Condition", "Mean Factor Score")
runaov("exp_subject_id","factor_score", z_means, "condition") 
factorPlot = p + stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test", paired="True", na.rm=TRUE)
factorPlot = factorPlot + labs(y = "Mean Factor Score (Aesthetic Experience)")
factorPlot
```
It basically mimics enjoyment results, as that item has highest factor loading. But there was clearly something more interesting going on with all of the small differences in the ANOVAs reported above


```{r}
ratings = z_means[ , -which(names(z_means) %in% c("exp_subject_id","condition", "mean_corresp"))] # drop label columns and ones not using
ratings$mean_timespent = log(ratings$mean_timespent)

par(mfrow = c(3,1))
chart.Correlation(ratings, histogram=TRUE, pch=25, text.panel=labs)
fa.diagram(fa.none, digits=2, cut = .3, simple=TRUE)
#factorPlot
```


# Exploratory analysis

### Relationship between personal variables and time spent / feeling moved (all conditions)
```{r}
# merge mean table and participant level table
df = pall

# plot relation between area and feeling moved
p <- ggplot(df, aes(x=area_composite, y=mean_moved)) +
  geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
  theme(legend.position="none") + theme_classic()

# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray") 
p2 


# plot relation between area and timespent
p <- ggplot(df, aes(x=area_composite, y=mean_timespent)) +
  geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
  theme(legend.position="none") + theme_classic()

# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray") 
p2 

```
I would have expected a relationship between AReA and some of the aesthetic measures. 
Perhaps AReA more relevant for visual modality. Break out by modality? 

### Visual-only data
```{r}
# merge mean table and participant level table
# pvis = pall[, pall$condition == 'Visual']
# df = pvis


pvis <- pall %>%
  filter(condition == "Visual")

df = pvis

# plot relation between area and feeling moved
p <- ggplot(df, aes(x=area_composite, y=mean_moved)) +
  geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
  theme(legend.position="none") + theme_classic()

# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray") 
p2 


# plot relation between area and timespent
p <- ggplot(df, aes(x=area_composite, y=mean_timespent)) +
  geom_point() + geom_smooth(method=lm , color="black", fill="gray", se=TRUE) +
  theme(legend.position="none") + theme_classic()

# Custom marginal plots:
p2 <- ggMarginal(p, type="densigram", fill = "gray") 
p2 

```







