Skip to article frontmatterSkip to article content

Data preprocessing for pyscene data

Import libraries

import numpy as np
import pandas as pd
import seaborn as sns
import friends_pack

Get all data into a pandas data frame



 df_scenes = friends_pack.tsv_to_df("../friends_annotations/annotation_results/TSVpyscene", '-Scenes.tsv')
 

Add rows that you need for analyses

#add scene number and global scene number
df_scenes['scene_number'] = df_scenes.groupby('episode').cumcount() + 1
df_scenes['global_scene_number'] = range(1, len(df_scenes) + 1)
df_scenes
Loading...
print(df_scenes.describe())
              onset      duration   onset_frame  scene_number  \
count  47659.000000  47659.000000  47659.000000  47659.000000   
mean     339.900579      4.188695  10177.691727     85.960574   
std      205.274240      3.620343   6145.618798     51.041465   
min        0.000000      0.100000      1.000000      1.000000   
25%      159.668000      1.902000   4782.000000     42.000000   
50%      334.051000      3.137000  10004.000000     84.000000   
75%      514.627500      5.239000  15414.000000    127.000000   
max      874.486000    138.879000  26208.000000    240.000000   

       global_scene_number  
count         47659.000000  
mean          23830.000000  
std           13758.112576  
min               1.000000  
25%           11915.500000  
50%           23830.000000  
75%           35744.500000  
max           47659.000000  

save your dataframe into a tsv

df_scenes.to_csv("../output/scenes_data.tsv", sep="\t", index=False)