-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathis_charset_bad.py
28 lines (24 loc) · 919 Bytes
/
is_charset_bad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 11 11:01:57 2017
@author: Suha nasser
"""
import pandas as pd
import glob
import os
import numpy as np
def bad_data(tablebinom):
'''
Find if the character set is rejecting the null hypothesis of symmetry
'''
csvin = pd.read_csv(tablebinom)
csvin['dataset'] = os.path.basename(os.path.dirname(os.path.dirname(tablebinom)))
csvin['isbad'] = np.where(csvin.p_binomial<0.05, 1,0) #If the p-binomial of a character set is less than 0.05, then the character set isbad and it rejects the null hypothesis of symmetry
csvin.to_csv(tablebinom, index=False)
return
all_data = pd.DataFrame()
for f in glob.glob('/data/srh/processed_data/SRH_tables/*/Data/tablebinom.csv'):
bad_data(f)
df1 = pd.read_csv(f)
all_data = all_data.append(df1,ignore_index=True)
all_data.to_csv('/data/srh/tables/is_charset_bad.csv', index=False)