|
965 | 965 | "pd.crosstab(data[\"Embarked\"],data[\"Sex\"],margins=True)"
|
966 | 966 | ]
|
967 | 967 | },
|
| 968 | + { |
| 969 | + "cell_type": "code", |
| 970 | + "execution_count": 49, |
| 971 | + "metadata": {}, |
| 972 | + "outputs": [ |
| 973 | + { |
| 974 | + "name": "stdout", |
| 975 | + "output_type": "stream", |
| 976 | + "text": [ |
| 977 | + "<class 'pandas.core.frame.DataFrame'>\n", |
| 978 | + "RangeIndex: 891 entries, 0 to 890\n", |
| 979 | + "Data columns (total 12 columns):\n", |
| 980 | + "PassengerId 891 non-null int64\n", |
| 981 | + "Survived 891 non-null int64\n", |
| 982 | + "Pclass 891 non-null int64\n", |
| 983 | + "Name 891 non-null object\n", |
| 984 | + "Sex 891 non-null object\n", |
| 985 | + "Age 714 non-null float64\n", |
| 986 | + "SibSp 891 non-null int64\n", |
| 987 | + "Parch 891 non-null int64\n", |
| 988 | + "Ticket 891 non-null object\n", |
| 989 | + "Fare 891 non-null float64\n", |
| 990 | + "Cabin 204 non-null object\n", |
| 991 | + "Embarked 889 non-null object\n", |
| 992 | + "dtypes: float64(2), int64(5), object(5)\n", |
| 993 | + "memory usage: 83.6+ KB\n" |
| 994 | + ] |
| 995 | + } |
| 996 | + ], |
| 997 | + "source": [ |
| 998 | + "data.info()" |
| 999 | + ] |
| 1000 | + }, |
| 1001 | + { |
| 1002 | + "cell_type": "code", |
| 1003 | + "execution_count": 50, |
| 1004 | + "metadata": {}, |
| 1005 | + "outputs": [ |
| 1006 | + { |
| 1007 | + "data": { |
| 1008 | + "text/plain": [ |
| 1009 | + "12" |
| 1010 | + ] |
| 1011 | + }, |
| 1012 | + "execution_count": 50, |
| 1013 | + "metadata": {}, |
| 1014 | + "output_type": "execute_result" |
| 1015 | + } |
| 1016 | + ], |
| 1017 | + "source": [ |
| 1018 | + "data.shape[1] # gives number of columns" |
| 1019 | + ] |
| 1020 | + }, |
| 1021 | + { |
| 1022 | + "cell_type": "markdown", |
| 1023 | + "metadata": {}, |
| 1024 | + "source": [ |
| 1025 | + "# who was the oldest person" |
| 1026 | + ] |
| 1027 | + }, |
| 1028 | + { |
| 1029 | + "cell_type": "code", |
| 1030 | + "execution_count": 62, |
| 1031 | + "metadata": {}, |
| 1032 | + "outputs": [ |
| 1033 | + { |
| 1034 | + "data": { |
| 1035 | + "text/html": [ |
| 1036 | + "<div>\n", |
| 1037 | + "<style scoped>\n", |
| 1038 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1039 | + " vertical-align: middle;\n", |
| 1040 | + " }\n", |
| 1041 | + "\n", |
| 1042 | + " .dataframe tbody tr th {\n", |
| 1043 | + " vertical-align: top;\n", |
| 1044 | + " }\n", |
| 1045 | + "\n", |
| 1046 | + " .dataframe thead th {\n", |
| 1047 | + " text-align: right;\n", |
| 1048 | + " }\n", |
| 1049 | + "</style>\n", |
| 1050 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1051 | + " <thead>\n", |
| 1052 | + " <tr style=\"text-align: right;\">\n", |
| 1053 | + " <th></th>\n", |
| 1054 | + " <th>PassengerId</th>\n", |
| 1055 | + " <th>Survived</th>\n", |
| 1056 | + " <th>Pclass</th>\n", |
| 1057 | + " <th>Age</th>\n", |
| 1058 | + " <th>SibSp</th>\n", |
| 1059 | + " <th>Parch</th>\n", |
| 1060 | + " <th>Fare</th>\n", |
| 1061 | + " </tr>\n", |
| 1062 | + " <tr>\n", |
| 1063 | + " <th>Name</th>\n", |
| 1064 | + " <th></th>\n", |
| 1065 | + " <th></th>\n", |
| 1066 | + " <th></th>\n", |
| 1067 | + " <th></th>\n", |
| 1068 | + " <th></th>\n", |
| 1069 | + " <th></th>\n", |
| 1070 | + " <th></th>\n", |
| 1071 | + " </tr>\n", |
| 1072 | + " </thead>\n", |
| 1073 | + " <tbody>\n", |
| 1074 | + " <tr>\n", |
| 1075 | + " <th>Barkworth, Mr. Algernon Henry Wilson</th>\n", |
| 1076 | + " <td>631</td>\n", |
| 1077 | + " <td>1</td>\n", |
| 1078 | + " <td>1</td>\n", |
| 1079 | + " <td>80.0</td>\n", |
| 1080 | + " <td>0</td>\n", |
| 1081 | + " <td>0</td>\n", |
| 1082 | + " <td>30.0</td>\n", |
| 1083 | + " </tr>\n", |
| 1084 | + " </tbody>\n", |
| 1085 | + "</table>\n", |
| 1086 | + "</div>" |
| 1087 | + ], |
| 1088 | + "text/plain": [ |
| 1089 | + " PassengerId Survived Pclass Age \\\n", |
| 1090 | + "Name \n", |
| 1091 | + "Barkworth, Mr. Algernon Henry Wilson 631 1 1 80.0 \n", |
| 1092 | + "\n", |
| 1093 | + " SibSp Parch Fare \n", |
| 1094 | + "Name \n", |
| 1095 | + "Barkworth, Mr. Algernon Henry Wilson 0 0 30.0 " |
| 1096 | + ] |
| 1097 | + }, |
| 1098 | + "execution_count": 62, |
| 1099 | + "metadata": {}, |
| 1100 | + "output_type": "execute_result" |
| 1101 | + } |
| 1102 | + ], |
| 1103 | + "source": [ |
| 1104 | + "c = data.groupby('Name')\n", |
| 1105 | + "c = c.sum()\n", |
| 1106 | + "c = c.sort_values(['Age'], ascending=False)\n", |
| 1107 | + "c.head(1)" |
| 1108 | + ] |
| 1109 | + }, |
| 1110 | + { |
| 1111 | + "cell_type": "markdown", |
| 1112 | + "metadata": {}, |
| 1113 | + "source": [ |
| 1114 | + "# list five oldest persons" |
| 1115 | + ] |
| 1116 | + }, |
| 1117 | + { |
| 1118 | + "cell_type": "code", |
| 1119 | + "execution_count": 63, |
| 1120 | + "metadata": {}, |
| 1121 | + "outputs": [ |
| 1122 | + { |
| 1123 | + "data": { |
| 1124 | + "text/html": [ |
| 1125 | + "<div>\n", |
| 1126 | + "<style scoped>\n", |
| 1127 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1128 | + " vertical-align: middle;\n", |
| 1129 | + " }\n", |
| 1130 | + "\n", |
| 1131 | + " .dataframe tbody tr th {\n", |
| 1132 | + " vertical-align: top;\n", |
| 1133 | + " }\n", |
| 1134 | + "\n", |
| 1135 | + " .dataframe thead th {\n", |
| 1136 | + " text-align: right;\n", |
| 1137 | + " }\n", |
| 1138 | + "</style>\n", |
| 1139 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1140 | + " <thead>\n", |
| 1141 | + " <tr style=\"text-align: right;\">\n", |
| 1142 | + " <th></th>\n", |
| 1143 | + " <th>PassengerId</th>\n", |
| 1144 | + " <th>Survived</th>\n", |
| 1145 | + " <th>Pclass</th>\n", |
| 1146 | + " <th>Age</th>\n", |
| 1147 | + " <th>SibSp</th>\n", |
| 1148 | + " <th>Parch</th>\n", |
| 1149 | + " <th>Fare</th>\n", |
| 1150 | + " </tr>\n", |
| 1151 | + " <tr>\n", |
| 1152 | + " <th>Name</th>\n", |
| 1153 | + " <th></th>\n", |
| 1154 | + " <th></th>\n", |
| 1155 | + " <th></th>\n", |
| 1156 | + " <th></th>\n", |
| 1157 | + " <th></th>\n", |
| 1158 | + " <th></th>\n", |
| 1159 | + " <th></th>\n", |
| 1160 | + " </tr>\n", |
| 1161 | + " </thead>\n", |
| 1162 | + " <tbody>\n", |
| 1163 | + " <tr>\n", |
| 1164 | + " <th>Barkworth, Mr. Algernon Henry Wilson</th>\n", |
| 1165 | + " <td>631</td>\n", |
| 1166 | + " <td>1</td>\n", |
| 1167 | + " <td>1</td>\n", |
| 1168 | + " <td>80.0</td>\n", |
| 1169 | + " <td>0</td>\n", |
| 1170 | + " <td>0</td>\n", |
| 1171 | + " <td>30.0000</td>\n", |
| 1172 | + " </tr>\n", |
| 1173 | + " <tr>\n", |
| 1174 | + " <th>Svensson, Mr. Johan</th>\n", |
| 1175 | + " <td>852</td>\n", |
| 1176 | + " <td>0</td>\n", |
| 1177 | + " <td>3</td>\n", |
| 1178 | + " <td>74.0</td>\n", |
| 1179 | + " <td>0</td>\n", |
| 1180 | + " <td>0</td>\n", |
| 1181 | + " <td>7.7750</td>\n", |
| 1182 | + " </tr>\n", |
| 1183 | + " <tr>\n", |
| 1184 | + " <th>Goldschmidt, Mr. George B</th>\n", |
| 1185 | + " <td>97</td>\n", |
| 1186 | + " <td>0</td>\n", |
| 1187 | + " <td>1</td>\n", |
| 1188 | + " <td>71.0</td>\n", |
| 1189 | + " <td>0</td>\n", |
| 1190 | + " <td>0</td>\n", |
| 1191 | + " <td>34.6542</td>\n", |
| 1192 | + " </tr>\n", |
| 1193 | + " <tr>\n", |
| 1194 | + " <th>Artagaveytia, Mr. Ramon</th>\n", |
| 1195 | + " <td>494</td>\n", |
| 1196 | + " <td>0</td>\n", |
| 1197 | + " <td>1</td>\n", |
| 1198 | + " <td>71.0</td>\n", |
| 1199 | + " <td>0</td>\n", |
| 1200 | + " <td>0</td>\n", |
| 1201 | + " <td>49.5042</td>\n", |
| 1202 | + " </tr>\n", |
| 1203 | + " <tr>\n", |
| 1204 | + " <th>Connors, Mr. Patrick</th>\n", |
| 1205 | + " <td>117</td>\n", |
| 1206 | + " <td>0</td>\n", |
| 1207 | + " <td>3</td>\n", |
| 1208 | + " <td>70.5</td>\n", |
| 1209 | + " <td>0</td>\n", |
| 1210 | + " <td>0</td>\n", |
| 1211 | + " <td>7.7500</td>\n", |
| 1212 | + " </tr>\n", |
| 1213 | + " </tbody>\n", |
| 1214 | + "</table>\n", |
| 1215 | + "</div>" |
| 1216 | + ], |
| 1217 | + "text/plain": [ |
| 1218 | + " PassengerId Survived Pclass Age \\\n", |
| 1219 | + "Name \n", |
| 1220 | + "Barkworth, Mr. Algernon Henry Wilson 631 1 1 80.0 \n", |
| 1221 | + "Svensson, Mr. Johan 852 0 3 74.0 \n", |
| 1222 | + "Goldschmidt, Mr. George B 97 0 1 71.0 \n", |
| 1223 | + "Artagaveytia, Mr. Ramon 494 0 1 71.0 \n", |
| 1224 | + "Connors, Mr. Patrick 117 0 3 70.5 \n", |
| 1225 | + "\n", |
| 1226 | + " SibSp Parch Fare \n", |
| 1227 | + "Name \n", |
| 1228 | + "Barkworth, Mr. Algernon Henry Wilson 0 0 30.0000 \n", |
| 1229 | + "Svensson, Mr. Johan 0 0 7.7750 \n", |
| 1230 | + "Goldschmidt, Mr. George B 0 0 34.6542 \n", |
| 1231 | + "Artagaveytia, Mr. Ramon 0 0 49.5042 \n", |
| 1232 | + "Connors, Mr. Patrick 0 0 7.7500 " |
| 1233 | + ] |
| 1234 | + }, |
| 1235 | + "execution_count": 63, |
| 1236 | + "metadata": {}, |
| 1237 | + "output_type": "execute_result" |
| 1238 | + } |
| 1239 | + ], |
| 1240 | + "source": [ |
| 1241 | + "c.head(5)" |
| 1242 | + ] |
| 1243 | + }, |
| 1244 | + { |
| 1245 | + "cell_type": "markdown", |
| 1246 | + "metadata": {}, |
| 1247 | + "source": [ |
| 1248 | + "# Multi Index Tables" |
| 1249 | + ] |
| 1250 | + }, |
| 1251 | + { |
| 1252 | + "cell_type": "code", |
| 1253 | + "execution_count": 68, |
| 1254 | + "metadata": {}, |
| 1255 | + "outputs": [ |
| 1256 | + { |
| 1257 | + "data": { |
| 1258 | + "text/html": [ |
| 1259 | + "<div>\n", |
| 1260 | + "<style scoped>\n", |
| 1261 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1262 | + " vertical-align: middle;\n", |
| 1263 | + " }\n", |
| 1264 | + "\n", |
| 1265 | + " .dataframe tbody tr th {\n", |
| 1266 | + " vertical-align: top;\n", |
| 1267 | + " }\n", |
| 1268 | + "\n", |
| 1269 | + " .dataframe thead th {\n", |
| 1270 | + " text-align: right;\n", |
| 1271 | + " }\n", |
| 1272 | + "</style>\n", |
| 1273 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1274 | + " <thead>\n", |
| 1275 | + " <tr style=\"text-align: right;\">\n", |
| 1276 | + " <th></th>\n", |
| 1277 | + " <th></th>\n", |
| 1278 | + " <th>a</th>\n", |
| 1279 | + " <th>b</th>\n", |
| 1280 | + " <th>c</th>\n", |
| 1281 | + " </tr>\n", |
| 1282 | + " <tr>\n", |
| 1283 | + " <th>n</th>\n", |
| 1284 | + " <th>v</th>\n", |
| 1285 | + " <th></th>\n", |
| 1286 | + " <th></th>\n", |
| 1287 | + " <th></th>\n", |
| 1288 | + " </tr>\n", |
| 1289 | + " </thead>\n", |
| 1290 | + " <tbody>\n", |
| 1291 | + " <tr>\n", |
| 1292 | + " <th>d</th>\n", |
| 1293 | + " <th>1</th>\n", |
| 1294 | + " <td>4</td>\n", |
| 1295 | + " <td>7</td>\n", |
| 1296 | + " <td>10</td>\n", |
| 1297 | + " </tr>\n", |
| 1298 | + " <tr>\n", |
| 1299 | + " <th>f</th>\n", |
| 1300 | + " <th>2</th>\n", |
| 1301 | + " <td>5</td>\n", |
| 1302 | + " <td>8</td>\n", |
| 1303 | + " <td>11</td>\n", |
| 1304 | + " </tr>\n", |
| 1305 | + " <tr>\n", |
| 1306 | + " <th>e</th>\n", |
| 1307 | + " <th>2</th>\n", |
| 1308 | + " <td>6</td>\n", |
| 1309 | + " <td>9</td>\n", |
| 1310 | + " <td>12</td>\n", |
| 1311 | + " </tr>\n", |
| 1312 | + " </tbody>\n", |
| 1313 | + "</table>\n", |
| 1314 | + "</div>" |
| 1315 | + ], |
| 1316 | + "text/plain": [ |
| 1317 | + " a b c\n", |
| 1318 | + "n v \n", |
| 1319 | + "d 1 4 7 10\n", |
| 1320 | + "f 2 5 8 11\n", |
| 1321 | + "e 2 6 9 12" |
| 1322 | + ] |
| 1323 | + }, |
| 1324 | + "execution_count": 68, |
| 1325 | + "metadata": {}, |
| 1326 | + "output_type": "execute_result" |
| 1327 | + } |
| 1328 | + ], |
| 1329 | + "source": [ |
| 1330 | + "df = pd.DataFrame({\"a\" : [4 ,5, 6],\"b\" : [7, 8, 9],\"c\" : [10, 11, 12]}, index = pd.MultiIndex.from_tuples([('d',1),('f',2),('e',2)],\n", |
| 1331 | + "names=['n','v']))\n", |
| 1332 | + "df" |
| 1333 | + ] |
| 1334 | + }, |
968 | 1335 | {
|
969 | 1336 | "cell_type": "code",
|
970 | 1337 | "execution_count": null,
|
|
0 commit comments