import pandas as pd
dfp = pd.read_excel(r'Absenteeism_at_work.xls')
dfp.head(5)
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
1 | 36 | 0 | 7 | 3 | 1 | 118 | 13 | 18 | 50 | 239554 | ... | 1 | 1 | 1 | 1 | 0 | 0 | 98 | 178 | 31 | 0 |
2 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
3 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
4 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
5 rows × 21 columns
dfJson = pd.read_json(r'test.json')
dfJson.head(5)
Names | Age | |
---|---|---|
0 | John | 33 |
1 | Sal | 45 |
2 | Tim | 22 |
3 | Rod | 54 |
dfURL = pd.read_csv(r'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data', names =['Sex',
'Length','Diameter', 'Height','Whole weight', 'Shucked weight','Viscera weight', 'Shell weight', 'Rings'])
dfURL.head(5)
Sex | Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|---|
0 | M | 0.455 | 0.365 | 0.095 | 0.5140 | 0.2245 | 0.1010 | 0.150 | 15 |
1 | M | 0.350 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.070 | 7 |
2 | F | 0.530 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.210 | 9 |
3 | M | 0.440 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.155 | 10 |
4 | I | 0.330 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.055 | 7 |
from pandasql import sqldf
dfpsql = pd.read_excel(r'Absenteeism_at_work.xls')
Query_string = """ select * from dfpsql limit 5 """
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
1 | 36 | 0 | 7 | 3 | 1 | 118 | 13 | 18 | 50 | 239554 | ... | 1 | 1 | 1 | 1 | 0 | 0 | 98 | 178 | 31 | 0 |
2 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
3 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
4 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
5 rows × 21 columns
dfp[(dfp['Age'] >=30) & (dfp['Age'] <=45)]
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
2 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
3 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
4 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
5 | 3 | 23 | 7 | 6 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
734 | 13 | 13 | 7 | 2 | 1 | 369 | 17 | 12 | 31 | 264604 | ... | 0 | 1 | 3 | 1 | 0 | 0 | 70 | 169 | 25 | 80 |
735 | 11 | 14 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 264604 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 8 |
736 | 1 | 11 | 7 | 3 | 1 | 235 | 11 | 14 | 37 | 264604 | ... | 0 | 3 | 1 | 0 | 0 | 1 | 88 | 172 | 29 | 4 |
737 | 4 | 0 | 0 | 3 | 1 | 118 | 14 | 13 | 40 | 271219 | ... | 0 | 1 | 1 | 1 | 0 | 8 | 98 | 170 | 34 | 0 |
738 | 8 | 0 | 0 | 4 | 2 | 231 | 35 | 14 | 39 | 271219 | ... | 0 | 1 | 2 | 1 | 0 | 2 | 100 | 170 | 35 | 0 |
526 rows × 21 columns
Query_string = """ select * from dfpsql where age>=30 and age<=45 """
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
1 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
2 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
3 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
4 | 3 | 23 | 7 | 6 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
521 | 13 | 13 | 7 | 2 | 1 | 369 | 17 | 12 | 31 | 264604 | ... | 0 | 1 | 3 | 1 | 0 | 0 | 70 | 169 | 25 | 80 |
522 | 11 | 14 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 264604 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 8 |
523 | 1 | 11 | 7 | 3 | 1 | 235 | 11 | 14 | 37 | 264604 | ... | 0 | 3 | 1 | 0 | 0 | 1 | 88 | 172 | 29 | 4 |
524 | 4 | 0 | 0 | 3 | 1 | 118 | 14 | 13 | 40 | 271219 | ... | 0 | 1 | 1 | 1 | 0 | 8 | 98 | 170 | 34 | 0 |
525 | 8 | 0 | 0 | 4 | 2 | 231 | 35 | 14 | 39 | 271219 | ... | 0 | 1 | 2 | 1 | 0 | 2 | 100 | 170 | 35 | 0 |
526 rows × 21 columns
dfp['ID'].unique()
array([11, 36, 3, 7, 10, 20, 14, 1, 24, 6, 33, 18, 30, 2, 19, 27, 34, 5, 15, 29, 28, 13, 22, 17, 31, 23, 32, 9, 26, 21, 8, 25, 12, 16, 4, 35], dtype=int64)
Query_string = """ select distinct ID from dfpsql;"""
sqldf(Query_string, globals())
ID | |
---|---|
0 | 11 |
1 | 36 |
2 | 3 |
3 | 7 |
4 | 10 |
5 | 20 |
6 | 14 |
7 | 1 |
8 | 24 |
9 | 6 |
10 | 33 |
11 | 18 |
12 | 30 |
13 | 2 |
14 | 19 |
15 | 27 |
16 | 34 |
17 | 5 |
18 | 15 |
19 | 29 |
20 | 28 |
21 | 13 |
22 | 22 |
23 | 17 |
24 | 31 |
25 | 23 |
26 | 32 |
27 | 9 |
28 | 26 |
29 | 21 |
30 | 8 |
31 | 25 |
32 | 12 |
33 | 16 |
34 | 4 |
35 | 35 |
dfp[dfp.Age.isin([20,30,40])]
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
47 | 15 | 23 | 9 | 5 | 1 | 291 | 31 | 12 | 40 | 241476 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 4 |
49 | 15 | 14 | 9 | 2 | 4 | 291 | 31 | 12 | 40 | 241476 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 32 |
65 | 22 | 23 | 10 | 5 | 4 | 179 | 26 | 9 | 30 | 253465 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 1 |
71 | 15 | 23 | 10 | 5 | 4 | 291 | 31 | 12 | 40 | 253465 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 5 |
75 | 15 | 14 | 10 | 3 | 4 | 291 | 31 | 12 | 40 | 253465 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
713 | 22 | 27 | 6 | 6 | 3 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 2 |
717 | 22 | 13 | 6 | 5 | 3 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 2 |
718 | 15 | 28 | 6 | 5 | 3 | 291 | 31 | 12 | 40 | 275089 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 2 |
719 | 22 | 13 | 6 | 2 | 1 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 3 |
737 | 4 | 0 | 0 | 3 | 1 | 118 | 14 | 13 | 40 | 271219 | ... | 0 | 1 | 1 | 1 | 0 | 8 | 98 | 170 | 34 | 0 |
104 rows × 21 columns
Query_string = """ select * from dfpsql where Age in(20,30,40);"""
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 15 | 23 | 9 | 5 | 1 | 291 | 31 | 12 | 40 | 241476 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 4 |
1 | 15 | 14 | 9 | 2 | 4 | 291 | 31 | 12 | 40 | 241476 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 32 |
2 | 22 | 23 | 10 | 5 | 4 | 179 | 26 | 9 | 30 | 253465 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 1 |
3 | 15 | 23 | 10 | 5 | 4 | 291 | 31 | 12 | 40 | 253465 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 5 |
4 | 15 | 14 | 10 | 3 | 4 | 291 | 31 | 12 | 40 | 253465 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99 | 22 | 27 | 6 | 6 | 3 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 2 |
100 | 22 | 13 | 6 | 5 | 3 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 2 |
101 | 15 | 28 | 6 | 5 | 3 | 291 | 31 | 12 | 40 | 275089 | ... | 0 | 1 | 1 | 1 | 0 | 1 | 73 | 171 | 25 | 2 |
102 | 22 | 13 | 6 | 2 | 1 | 179 | 26 | 9 | 30 | 275089 | ... | 0 | 3 | 0 | 0 | 0 | 0 | 56 | 171 | 19 | 3 |
103 | 4 | 0 | 0 | 3 | 1 | 118 | 14 | 13 | 40 | 271219 | ... | 0 | 1 | 1 | 1 | 0 | 8 | 98 | 170 | 34 | 0 |
104 rows × 21 columns
dfp[~dfp.Age.isin([20,30,40])]
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
1 | 36 | 0 | 7 | 3 | 1 | 118 | 13 | 18 | 50 | 239554 | ... | 1 | 1 | 1 | 1 | 0 | 0 | 98 | 178 | 31 | 0 |
2 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
3 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
4 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
734 | 13 | 13 | 7 | 2 | 1 | 369 | 17 | 12 | 31 | 264604 | ... | 0 | 1 | 3 | 1 | 0 | 0 | 70 | 169 | 25 | 80 |
735 | 11 | 14 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 264604 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 8 |
736 | 1 | 11 | 7 | 3 | 1 | 235 | 11 | 14 | 37 | 264604 | ... | 0 | 3 | 1 | 0 | 0 | 1 | 88 | 172 | 29 | 4 |
738 | 8 | 0 | 0 | 4 | 2 | 231 | 35 | 14 | 39 | 271219 | ... | 0 | 1 | 2 | 1 | 0 | 2 | 100 | 170 | 35 | 0 |
739 | 35 | 0 | 0 | 6 | 3 | 179 | 45 | 14 | 53 | 271219 | ... | 0 | 1 | 1 | 0 | 0 | 1 | 77 | 175 | 25 | 0 |
636 rows × 21 columns
Query_string = """ select * from dfpsql where Age not in(20,30,40);"""
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11 | 26 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 4 |
1 | 36 | 0 | 7 | 3 | 1 | 118 | 13 | 18 | 50 | 239554 | ... | 1 | 1 | 1 | 1 | 0 | 0 | 98 | 178 | 31 | 0 |
2 | 3 | 23 | 7 | 4 | 1 | 179 | 51 | 18 | 38 | 239554 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 89 | 170 | 31 | 2 |
3 | 7 | 7 | 7 | 5 | 1 | 279 | 5 | 14 | 39 | 239554 | ... | 0 | 1 | 2 | 1 | 1 | 0 | 68 | 168 | 24 | 4 |
4 | 11 | 23 | 7 | 5 | 1 | 289 | 36 | 13 | 33 | 239554 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
631 | 13 | 13 | 7 | 2 | 1 | 369 | 17 | 12 | 31 | 264604 | ... | 0 | 1 | 3 | 1 | 0 | 0 | 70 | 169 | 25 | 80 |
632 | 11 | 14 | 7 | 3 | 1 | 289 | 36 | 13 | 33 | 264604 | ... | 0 | 1 | 2 | 1 | 0 | 1 | 90 | 172 | 30 | 8 |
633 | 1 | 11 | 7 | 3 | 1 | 235 | 11 | 14 | 37 | 264604 | ... | 0 | 3 | 1 | 0 | 0 | 1 | 88 | 172 | 29 | 4 |
634 | 8 | 0 | 0 | 4 | 2 | 231 | 35 | 14 | 39 | 271219 | ... | 0 | 1 | 2 | 1 | 0 | 2 | 100 | 170 | 35 | 0 |
635 | 35 | 0 | 0 | 6 | 3 | 179 | 45 | 14 | 53 | 271219 | ... | 0 | 1 | 1 | 0 | 0 | 1 | 77 | 175 | 25 | 0 |
636 rows × 21 columns
dfp.sort_values(by = ['Age','Service_time'], ascending= True)
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
40 | 27 | 23 | 9 | 3 | 1 | 184 | 42 | 7 | 27 | 241476 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
118 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
132 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
137 | 27 | 23 | 2 | 6 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 1 |
149 | 27 | 23 | 2 | 3 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
620 | 9 | 25 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 3 |
622 | 9 | 12 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 112 |
640 | 9 | 25 | 3 | 4 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 2 |
727 | 9 | 6 | 7 | 2 | 1 | 228 | 14 | 16 | 58 | 264604 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
729 | 9 | 6 | 7 | 3 | 1 | 228 | 14 | 16 | 58 | 264604 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 120 |
740 rows × 21 columns
Query_string = """ select * from dfpsql order by Age,Service_time;"""
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27 | 23 | 9 | 3 | 1 | 184 | 42 | 7 | 27 | 241476 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
1 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
2 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
3 | 27 | 23 | 2 | 6 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 1 |
4 | 27 | 23 | 2 | 3 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
735 | 9 | 25 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 3 |
736 | 9 | 12 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 112 |
737 | 9 | 25 | 3 | 4 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 2 |
738 | 9 | 6 | 7 | 2 | 1 | 228 | 14 | 16 | 58 | 264604 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
739 | 9 | 6 | 7 | 3 | 1 | 228 | 14 | 16 | 58 | 264604 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 120 |
740 rows × 21 columns
#Desending
dfp.sort_values(by = ['Age','Service_time'], ascending= False)
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
255 | 9 | 18 | 8 | 3 | 1 | 228 | 14 | 16 | 58 | 265615 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
434 | 9 | 18 | 5 | 4 | 3 | 228 | 14 | 16 | 58 | 246074 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
521 | 9 | 1 | 10 | 4 | 4 | 228 | 14 | 16 | 58 | 284853 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 1 |
620 | 9 | 25 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 3 |
622 | 9 | 12 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 112 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
132 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
137 | 27 | 23 | 2 | 6 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 1 |
149 | 27 | 23 | 2 | 3 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
209 | 27 | 7 | 5 | 4 | 3 | 184 | 42 | 7 | 27 | 378884 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 4 |
269 | 27 | 6 | 8 | 4 | 1 | 184 | 42 | 7 | 27 | 265615 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
740 rows × 21 columns
Query_string = """ select * from dfpsql order by Age Desc,Service_time Desc;"""
sqldf(Query_string, globals())
ID | Reason_for_absence | Month_of_absence | Day_of_the_week | Seasons | Transportation_expense | Distance_from_Residence_to_Work | Service_time | Age | Work load Average/day | ... | Disciplinary failure | Education | Son | Social_drinker | Social_smoker | Pet | Weight | Height | Body_mass_index | Absenteeism_time_in_hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | 18 | 8 | 3 | 1 | 228 | 14 | 16 | 58 | 265615 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
1 | 9 | 18 | 5 | 4 | 3 | 228 | 14 | 16 | 58 | 246074 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 8 |
2 | 9 | 1 | 10 | 4 | 4 | 228 | 14 | 16 | 58 | 284853 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 1 |
3 | 9 | 25 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 3 |
4 | 9 | 12 | 3 | 3 | 2 | 228 | 14 | 16 | 58 | 222196 | ... | 0 | 1 | 2 | 0 | 0 | 1 | 65 | 172 | 22 | 112 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
735 | 27 | 23 | 1 | 5 | 2 | 184 | 42 | 7 | 27 | 308593 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 2 |
736 | 27 | 23 | 2 | 6 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 1 |
737 | 27 | 23 | 2 | 3 | 2 | 184 | 42 | 7 | 27 | 302585 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
738 | 27 | 7 | 5 | 4 | 3 | 184 | 42 | 7 | 27 | 378884 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 4 |
739 | 27 | 6 | 8 | 4 | 1 | 184 | 42 | 7 | 27 | 265615 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 58 | 167 | 21 | 8 |
740 rows × 21 columns
dfp.agg({'Transportation_expense': ['count','min', 'max', 'mean']})
Transportation_expense | |
---|---|
count | 740.00000 |
min | 118.00000 |
max | 388.00000 |
mean | 221.32973 |
Query_string = """ select count(Transportation_expense) as count, min(Transportation_expense) as min, max(Transportation_expense) as max, avg(Transportation_expense) as mean from dfp;"""
sqldf(Query_string, globals())
count | min | max | mean | |
---|---|---|---|---|
0 | 740 | 118 | 388 | 221.32973 |
dfp.groupby('ID')['Service_time'].sum()
ID 1 322 2 72 3 2034 4 13 5 247 6 104 7 84 8 28 9 128 10 72 11 520 12 7 13 180 14 406 15 444 16 48 17 340 18 64 19 36 20 462 21 24 22 414 23 88 24 480 25 80 26 65 27 49 28 684 29 69 30 42 31 27 32 145 33 336 34 550 35 14 36 612 Name: Service_time, dtype: int64
Query_string = """ select ID , sum(Service_time) as Sum_Service_time from dfp
group by ID;"""
sqldf(Query_string, globals())
ID | Sum_Service_time | |
---|---|---|
0 | 1 | 322 |
1 | 2 | 72 |
2 | 3 | 2034 |
3 | 4 | 13 |
4 | 5 | 247 |
5 | 6 | 104 |
6 | 7 | 84 |
7 | 8 | 28 |
8 | 9 | 128 |
9 | 10 | 72 |
10 | 11 | 520 |
11 | 12 | 7 |
12 | 13 | 180 |
13 | 14 | 406 |
14 | 15 | 444 |
15 | 16 | 48 |
16 | 17 | 340 |
17 | 18 | 64 |
18 | 19 | 36 |
19 | 20 | 462 |
20 | 21 | 24 |
21 | 22 | 414 |
22 | 23 | 88 |
23 | 24 | 480 |
24 | 25 | 80 |
25 | 26 | 65 |
26 | 27 | 49 |
27 | 28 | 684 |
28 | 29 | 69 |
29 | 30 | 42 |
30 | 31 | 27 |
31 | 32 | 145 |
32 | 33 | 336 |
33 | 34 | 550 |
34 | 35 | 14 |
35 | 36 | 612 |
dfp.groupby('Reason_for_absence').agg({'Age': ['mean','min','max']})
Age | |||
---|---|---|---|
mean | min | max | |
Reason_for_absence | |||
0 | 39.604651 | 28 | 53 |
1 | 37.687500 | 28 | 58 |
2 | 28.000000 | 28 | 28 |
3 | 40.000000 | 40 | 40 |
4 | 45.000000 | 41 | 49 |
5 | 41.666667 | 37 | 50 |
6 | 38.500000 | 27 | 58 |
7 | 32.866667 | 27 | 46 |
8 | 36.500000 | 28 | 40 |
9 | 36.750000 | 28 | 41 |
10 | 35.960000 | 28 | 49 |
11 | 35.115385 | 28 | 50 |
12 | 36.125000 | 28 | 58 |
13 | 36.490909 | 28 | 50 |
14 | 41.052632 | 28 | 50 |
15 | 39.500000 | 36 | 43 |
16 | 37.333333 | 32 | 40 |
17 | 40.000000 | 40 | 40 |
18 | 37.619048 | 28 | 58 |
19 | 35.450000 | 28 | 50 |
21 | 37.166667 | 30 | 40 |
22 | 32.657895 | 28 | 41 |
23 | 35.610738 | 27 | 50 |
24 | 33.000000 | 33 | 33 |
25 | 36.161290 | 28 | 58 |
26 | 37.848485 | 28 | 50 |
27 | 35.507246 | 28 | 38 |
28 | 37.508929 | 28 | 50 |
Query_string = """ select Reason_for_absence , avg(Age) as mean, min(Age) as min, max(Age) as max from dfp
group by Reason_for_absence;"""
sqldf(Query_string, globals())
Reason_for_absence | mean | min | max | |
---|---|---|---|---|
0 | 0 | 39.604651 | 28 | 53 |
1 | 1 | 37.687500 | 28 | 58 |
2 | 2 | 28.000000 | 28 | 28 |
3 | 3 | 40.000000 | 40 | 40 |
4 | 4 | 45.000000 | 41 | 49 |
5 | 5 | 41.666667 | 37 | 50 |
6 | 6 | 38.500000 | 27 | 58 |
7 | 7 | 32.866667 | 27 | 46 |
8 | 8 | 36.500000 | 28 | 40 |
9 | 9 | 36.750000 | 28 | 41 |
10 | 10 | 35.960000 | 28 | 49 |
11 | 11 | 35.115385 | 28 | 50 |
12 | 12 | 36.125000 | 28 | 58 |
13 | 13 | 36.490909 | 28 | 50 |
14 | 14 | 41.052632 | 28 | 50 |
15 | 15 | 39.500000 | 36 | 43 |
16 | 16 | 37.333333 | 32 | 40 |
17 | 17 | 40.000000 | 40 | 40 |
18 | 18 | 37.619048 | 28 | 58 |
19 | 19 | 35.450000 | 28 | 50 |
20 | 21 | 37.166667 | 30 | 40 |
21 | 22 | 32.657895 | 28 | 41 |
22 | 23 | 35.610738 | 27 | 50 |
23 | 24 | 33.000000 | 33 | 33 |
24 | 25 | 36.161290 | 28 | 58 |
25 | 26 | 37.848485 | 28 | 50 |
26 | 27 | 35.507246 | 28 | 38 |
27 | 28 | 37.508929 | 28 | 50 |
import pandas as pd
data1 = {
'Empid': [1011, 1012, 1013, 1014, 1015],
'Name': ['John', 'Rahul', 'Rick', 'Morty', 'Tim'],
'Designation': ['Manager', 'Research Engineer', ' Research Engineer', 'VP', 'Delivery Manager'],
'Date_of_joining': ['01-Jan-2000', '23-sep-2006', '11-Jan-2012', '21-Jan-1991', '12-Jan-1990']}
Emp_df = pd.DataFrame(data1, columns = ['Empid', 'Name', 'Designation','Date_of_joining'])
Emp_df
Empid | Name | Designation | Date_of_joining | |
---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 |
1 | 1012 | Rahul | Research Engineer | 23-sep-2006 |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 |
3 | 1014 | Morty | VP | 21-Jan-1991 |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 |
data2 = {
'Empid': [1011, 1017, 1013, 1019, 1015],
'Deptartment': ['Management', 'Research', 'Research', 'Management', 'Delivery'],
'Total_Experience': [18, 10, 10, 28, 22]}
Dept_df = pd.DataFrame(data2, columns = ['Empid', 'Deptartment', 'Total_Experience'])
Dept_df
Empid | Deptartment | Total_Experience | |
---|---|---|---|
0 | 1011 | Management | 18 |
1 | 1017 | Research | 10 |
2 | 1013 | Research | 10 |
3 | 1019 | Management | 28 |
4 | 1015 | Delivery | 22 |
# Inner Join
pd.merge(Emp_df, Dept_df, left_on='Empid',right_on='Empid', how='inner')
Empid | Name | Designation | Date_of_joining | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | Management | 18 |
1 | 1013 | Rick | Research Engineer | 11-Jan-2012 | Research | 10 |
2 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | Delivery | 22 |
# Inner Join
Query_string = """ select * from Emp_df a INNER JOIN Dept_df b ON a.Empid = b.Empid;"""
sqldf(Query_string, globals())
Empid | Name | Designation | Date_of_joining | Empid | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | 1011 | Management | 18 |
1 | 1013 | Rick | Research Engineer | 11-Jan-2012 | 1013 | Research | 10 |
2 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | 1015 | Delivery | 22 |
# Left Join
pd.merge(Emp_df, Dept_df,left_on='Empid',right_on='Empid', how='left')
Empid | Name | Designation | Date_of_joining | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | Management | 18.0 |
1 | 1012 | Rahul | Research Engineer | 23-sep-2006 | NaN | NaN |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | Research | 10.0 |
3 | 1014 | Morty | VP | 21-Jan-1991 | NaN | NaN |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | Delivery | 22.0 |
# Left Join
Query_string = """ select * from Emp_df a LEFT JOIN Dept_df b ON a.Empid = b.Empid;"""
sqldf(Query_string, globals())
Empid | Name | Designation | Date_of_joining | Empid | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | 1011.0 | Management | 18.0 |
1 | 1012 | Rahul | Research Engineer | 23-sep-2006 | NaN | None | NaN |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | 1013.0 | Research | 10.0 |
3 | 1014 | Morty | VP | 21-Jan-1991 | NaN | None | NaN |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | 1015.0 | Delivery | 22.0 |
# Right Join
pd.merge(Emp_df, Dept_df,left_on='Empid',right_on='Empid', how='right')
Empid | Name | Designation | Date_of_joining | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | Management | 18 |
1 | 1017 | NaN | NaN | NaN | Research | 10 |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | Research | 10 |
3 | 1019 | NaN | NaN | NaN | Management | 28 |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | Delivery | 22 |
# Right Join
Query_string = """ select a.Empid,Name,Designation,Date_of_joining,Deptartment,Total_Experience from Dept_df a LEFT JOIN Emp_df b ON a.Empid = b.Empid;"""
sqldf(Query_string, globals())
Empid | Name | Designation | Date_of_joining | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | Management | 18 |
1 | 1017 | None | None | None | Research | 10 |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | Research | 10 |
3 | 1019 | None | None | None | Management | 28 |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | Delivery | 22 |
# Outer Join
pd.merge(Emp_df, Dept_df,left_on='Empid',right_on='Empid', how='outer')
Empid | Name | Designation | Date_of_joining | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | Management | 18.0 |
1 | 1012 | Rahul | Research Engineer | 23-sep-2006 | NaN | NaN |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | Research | 10.0 |
3 | 1014 | Morty | VP | 21-Jan-1991 | NaN | NaN |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | Delivery | 22.0 |
5 | 1017 | NaN | NaN | NaN | Research | 10.0 |
6 | 1019 | NaN | NaN | NaN | Management | 28.0 |
# OUTER join
Query_string = """ select * from Emp_df a left OUTER JOIN Dept_df b ON a.Empid = b.Empid;"""
sqldf(Query_string, globals())
Empid | Name | Designation | Date_of_joining | Empid | Deptartment | Total_Experience | |
---|---|---|---|---|---|---|---|
0 | 1011 | John | Manager | 01-Jan-2000 | 1011.0 | Management | 18.0 |
1 | 1012 | Rahul | Research Engineer | 23-sep-2006 | NaN | None | NaN |
2 | 1013 | Rick | Research Engineer | 11-Jan-2012 | 1013.0 | Research | 10.0 |
3 | 1014 | Morty | VP | 21-Jan-1991 | NaN | None | NaN |
4 | 1015 | Tim | Delivery Manager | 12-Jan-1990 | 1015.0 | Delivery | 22.0 |
#to install sqlite3 in Anaconda
pip install sqlite3
conda install sqlite3
import sqlite3
con = sqlite3.connect('school.sqlite')
cur = con.cursor()
q = 'select * from faculty'
cur.execute(q)
for row in cur.fetchall():
print(row)
('98005', 'Suzanne', 'Viescas', '15127 NE 24th, #383', 'Redmond', 'WA', '98052', '425', '555-2686', '44000', '1986-05-31 00:00:00', 'Instructor', 'Full time', 'True') ('98007', 'Gary', 'Hallmark', 'Route 2, Box 203B', 'Auburn', 'WA', '98002', '253', '555-2676', '53000', '1985-01-21 00:00:00', 'Associate Professor', 'Full time', 'True') ('98010', 'Jeffrey', 'Smith', '30301 - 166th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2596', '52000', '1983-10-06 00:00:00', 'Associate Professor', 'On leave', 'True') ('98011', 'Ann', 'Patterson', '16 Maple Lane', 'Auburn', 'WA', '98002', '253', '555-2591', '45000', '1983-10-16 00:00:00', 'Instructor', 'Full time', 'True') ('98012', 'Michael', 'Davolio', '672 Lamont Ave', 'Houston', 'TX', '77201', '713', '555-2491', '49000', '1989-02-09 00:00:00', 'Instructor', 'Full time', 'True') ('98013', 'Ann', 'Fuller', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2496', '44000', '1986-07-05 00:00:00', 'Instructor', 'Full time', 'True') ('98014', 'James', 'Leverling', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2501', '60000', '1986-07-16 00:00:00', 'Professor', 'Full time', 'True') ('98019', 'Laura', 'Callahan', '901 Pine Avenue', 'Portland', 'OR', '97208', '503', '555-2526', '45000', '1989-11-02 00:00:00', 'Instructor', 'Full time', 'True') ('98020', 'Albert', 'Buchanan', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98009', '425', '555-2531', '45000', '1985-08-02 00:00:00', 'Instructor', 'Full time', 'True') ('98025', 'Janet', 'Leverling', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2576', '50000', '1984-04-12 00:00:00', 'Associate Professor', 'Full time', 'True') ('98028', 'Alaina', 'Hallmark', 'Route 2, Box 203B', 'Woodinville', 'WA', '98072', '425', '555-2631', '57000', '1984-01-17 00:00:00', 'Professor', 'Full time', 'True') ('98030', 'Amelia', 'Buchanan', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98006', '425', '555-2556', '48000', '1988-05-31 00:00:00', 'Instructor', 'Full time', 'True') ('98036', 'John', 'Leverling', '611 Alpine Drive', 'Palm Springs', 'CA', '92263', '760', '555-2611', '60000', '1982-11-20 00:00:00', 'Professor', 'Full time', 'True') ('98040', 'David', 'Callahan', '101 NE 88th', 'Salem', 'OR', '97301', '503', '555-2636', '50000', '1987-01-13 00:00:00', 'Associate Professor', 'Full time', 'True') ('98042', 'David', 'Smith', '311 20th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2646', '52000', '1991-12-17 00:00:00', 'Associate Professor', 'Full time', 'True') ('98045', 'Michael', 'Hernandez', 'PO Box 223311', 'Tacoma', 'WA', '98413', '253', '555-2711', '60000', '1990-08-20 00:00:00', 'Professor', 'Full time', 'True') ('98048', 'Joyce', 'Bonnicksen', '2424 Thames Drive', 'Bellevue', 'WA', '98006', '425', '555-2726', '60000', '1986-03-02 00:00:00', 'Professor', 'Full time', 'True') ('98052', 'Katherine', 'Ehrlich', '777 Fenexet Blvd', 'Redmond', 'WA', '98052', '425', '555-0399', '45000', '1985-03-08 00:00:00', 'Instructor', 'Part time', 'False') ('98053', 'Gregory', 'Piercy', '4501 Wetland Road', 'Long Beach', 'CA', '90809', '562', '555-0037', '45000', '1992-02-10 00:00:00', 'Instructor', 'Full time', 'True') ('98055', 'Alastair', 'Black', '3887 Easy Street', 'Seattle', 'WA', '98125', '206', '555-0039', '60000', '1988-12-11 00:00:00', 'Professor', 'Full time', 'True') ('98059', 'Consuelo', 'Maynez', '3445 Cheyenne Road', 'El Paso', 'TX', '79993', '915', '555-2291', '48000', '1986-09-17 00:00:00', 'Instructor', 'Full time', 'True') ('98062', 'Caroline', 'Coie', '298 Forest Lane', 'Seattle', 'WA', '98125', '206', '555-2306', '52000', '1983-01-28 00:00:00', 'Associate Professor', 'Full time', 'False') ('98063', 'Ryan', 'Ehrlich', '455 West Palm Ave', 'San Antonio', 'TX', '78284', '210', '555-2311', '45000', '1988-03-02 00:00:00', 'Instructor', 'Full time', 'True') ('98064', 'Allan', 'Davis', '877 145th Ave SE', 'Portland', 'OR', '97208', '503', '555-2316', '56000', '1989-08-20 00:00:00', 'Professor', 'Full time', 'True')
import pandas as pd
con = sqlite3.connect('school.sqlite')
def x(q):
return pd.read_sql_query(q, con)
x('select * from faculty Limit 6')
FacultyID | FacFirstName | FacLastname | FacStreetAddress | FacCity | FacState | FacZipCode | FacAreaCode | FacPhoneNumber | Salary | DateHired | Title | Status | Tenured | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 98005 | Suzanne | Viescas | 15127 NE 24th, #383 | Redmond | WA | 98052 | 425 | 555-2686 | 44000 | 1986-05-31 00:00:00 | Instructor | Full time | True |
1 | 98007 | Gary | Hallmark | Route 2, Box 203B | Auburn | WA | 98002 | 253 | 555-2676 | 53000 | 1985-01-21 00:00:00 | Associate Professor | Full time | True |
2 | 98010 | Jeffrey | Smith | 30301 - 166th Ave. N.E. | Fremont | CA | 94538 | 510 | 555-2596 | 52000 | 1983-10-06 00:00:00 | Associate Professor | On leave | True |
3 | 98011 | Ann | Patterson | 16 Maple Lane | Auburn | WA | 98002 | 253 | 555-2591 | 45000 | 1983-10-16 00:00:00 | Instructor | Full time | True |
4 | 98012 | Michael | Davolio | 672 Lamont Ave | Houston | TX | 77201 | 713 | 555-2491 | 49000 | 1989-02-09 00:00:00 | Instructor | Full time | True |
5 | 98013 | Ann | Fuller | 908 W. Capital Way | Tacoma | WA | 98413 | 253 | 555-2496 | 44000 | 1986-07-05 00:00:00 | Instructor | Full time | True |
q = 'select count(title) from faculty'
x(q)
count(title) | |
---|---|
0 | 24 |
q = 'select count(distinct title) from faculty'
x(q)
count(distinct title) | |
---|---|
0 | 3 |
q = 'select count(distinct title) as ranks from faculty'
x(q)
ranks | |
---|---|
0 | 3 |
x('select avg(salary) from faculty')
avg(salary) | |
---|---|
0 | 51041.666667 |
x('select * from student_schedules')
StudentID | SectionNumber | Grade | |
---|---|---|---|
0 | 1001 | 1560 | 93.28 |
1 | 1001 | 2071 | 66.41 |
2 | 1001 | 4055 | 62.85 |
3 | 1001 | 4760 | 73.21 |
4 | 1001 | 4812 | 66.91 |
... | ... | ... | ... |
84 | 1017 | 4768 | 85.26 |
85 | 1018 | 1031 | 87.05 |
86 | 1018 | 1180 | 69.88 |
87 | 1018 | 1502 | 67.13 |
88 | 1018 | 4768 | 81.23 |
89 rows × 3 columns
x('select * from class_sections')
SectionNumber | CourseID | RoomNumber | Credits | StartTime | Duration | MondaySchedule | TuesdaySchedule | WednesdaySchedule | ThursdaySchedule | FridaySchedule | SaturdaySchedule | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1000 | 11 | 1231 | 5 | 1899-12-30 10:00:00 | 50 | True | False | True | False | True | False |
1 | 1002 | 12 | 1619 | 4 | 1899-12-30 15:30:00 | 110 | True | False | True | False | True | False |
2 | 1004 | 13 | 1627 | 4 | 1899-12-30 08:00:00 | 50 | True | False | True | False | True | False |
3 | 1006 | 13 | 1627 | 4 | 1899-12-30 09:00:00 | 110 | True | False | True | False | True | False |
4 | 1012 | 14 | 1627 | 4 | 1899-12-30 13:00:00 | 170 | False | True | False | True | False | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
71 | 4810 | 23 | 3319 | 5 | 1899-12-30 08:00:00 | 50 | False | True | False | True | False | False |
72 | 4812 | 24 | 3330 | 3 | 1899-12-30 12:00:00 | 80 | False | True | False | True | False | False |
73 | 4813 | 24 | 3406 | 3 | 1899-12-30 10:00:00 | 50 | True | False | True | False | True | False |
74 | 4825 | 25 | 3319 | 5 | 1899-12-30 11:00:00 | 50 | True | True | True | True | True | False |
75 | 4880 | 26 | 1231 | 5 | 1899-12-30 13:30:00 | 110 | True | False | True | False | True | False |
76 rows × 12 columns
x('select * from courses')
CourseID | CategoryID | CourseCode | CourseName | CourseDescription | |
---|---|---|---|---|---|
0 | 1 | ACC | ACC 210 | Financial Accounting Fundamentals I | Introduces basic accounting concepts, principl... |
1 | 2 | ACC | ACC 220 | Financial Accounting Fundamentals II | Applications of basic accounting concepts, pri... |
2 | 3 | ACC | ACC 230 | Fundamentals of Managerial Accounting | Analysis of accounting data as part of the man... |
3 | 4 | ACC | ACC 251 | Intermediate Accounting | In-depth review of financial accounting princi... |
4 | 5 | ACC | ACC 257 | Business Tax Accounting | Basic principles, practices and governmental r... |
5 | 6 | BUS | BUS 101 | Introduction to Business | Survey of businss practices. Covers business t... |
6 | 7 | BUS | BUS 155 | Developing A Feasibility Plan | With the aid of a counselor, a feasibility pla... |
7 | 8 | BUS | BUS 151 | Introduction to Enterpreneurship | Overview of the entrepreneurial process, exami... |
8 | 9 | BUS | BUS 170 | Information Technology I | Uses Word for Windows word processing skills, ... |
9 | 10 | BUS | BUS 171 | Information Technology II | Uses intermediate Word features including form... |
10 | 11 | ART | ART 100 | Introduction to Art | Historical backgrounds and design fundamentals... |
11 | 12 | ART | ART 101 | Design | Studio sudies in the fundamentals of two-dimen... |
12 | 13 | ART | ART 111 | Drawing | Study of line, value, space, perspective, and ... |
13 | 14 | ART | ART 201 | Painting | Beginning painting in oil or synthetic media u... |
14 | 15 | ART | ART 210 | Computer Art | Explore the elements of art such as line, valu... |
15 | 16 | ART | ART 251 | Art History | Surveys major forms of visual expression from ... |
16 | 17 | BIO | BIO 100 | Biological Principles | An introductory biology course with lab for th... |
17 | 18 | BIO | BIO 101 | General Biology | Basic bilogical concepts with emphasis on gene... |
18 | 19 | BIO | BIO 280 | Microbiology | Introduction to micro-organisms including micr... |
19 | 20 | CHE | CHE 101 | Chemistry | General chemistry for non-science majors. Comp... |
20 | 21 | CHE | CHE 139 | Fundamentals of Chemistry | Prepatory for the science major chemistry cour... |
21 | 22 | CHE | CHE 231 | Organic Chemistry | Structure, nomenclature, reactions, and synthe... |
22 | 23 | CIS | CIS 101 | Microcomputer Applications | This is a "hands-on" course. Students will lea... |
23 | 24 | CIS | CIS 102 | Information Systems Concepts | Provides a broad introduction to computers and... |
24 | 25 | CIS | CIS 114 | Problem Solving and Structured Programming | Covers design, documentation, and coding of pr... |
25 | 26 | CIS | CIS 236 | Database Management | Includes database concepts, data management te... |
26 | 27 | CSC | CSC 110 | Programming in BASIC | Computer programming and program design using ... |
27 | 28 | CSC | CIS 142 | Computer Programming | Introduction to computer science using the C p... |
28 | 29 | JRN | JRN 104 | College Publications | Hands-on course in college publishing. Covers ... |
29 | 30 | ECO | ECO 100 | Survey of Economics | Economics applied to various comtemporary soci... |
30 | 31 | ECO | ECO 200 | Principles of Economics: Microeconomics | Covers resource allocation and income distribu... |
31 | 32 | ECO | ECO 201 | Principles of Economics: Macroeconomics | Analysis of the aggregate economy: GDP, inflat... |
32 | 33 | MUS | MUS 100 | Music in the Western World | An introduction to music. Features music from ... |
33 | 34 | MUS | MUS 101 | First Year Theory and Ear Training | Rudiments of music - notation, scales, interva... |
34 | 35 | MUS | MUS 201 | Second Year Music Theory | Continuation of MUS 101. Chromatic harmony, mo... |
35 | 36 | MUS | MUS 204 | History of Jazz | Traces the roots of jazz in America from New O... |
36 | 37 | ENG | ENG 101 | Composition - Fundamentals | Introduces the nature of the writing process i... |
37 | 38 | ENG | ENG 102 | Composition - Intermediate | Continues instruction on the writing process, ... |
38 | 39 | ENG | ENG 104 | Advanced English Grammar | Study of the grammar and rhetoric of the Engli... |
39 | 40 | GEG | GEG 100 | Introduction to Geography | An introduction to the major cultures of the w... |
40 | 41 | GEG | GEG 205 | Physical Geography | Study of the Earth, the materials that make it... |
41 | 42 | HIS | HIS 101 | World History to 1500 | Historic foundations and development of the gr... |
42 | 43 | HIS | HIS 111 | U.S. History to 1877 | American history from the colonial period thro... |
43 | 44 | HIS | HIS 112 | U.S. History Since 1865 | Includes Reconstruction, industrialization, ur... |
44 | 45 | MAT | MAT 080 | Preparatory Mathematics | Individualized instruction in Arithmetic, Alge... |
45 | 46 | MAT | MAT 097 | Elementary Algebra | First course in Algebra includes signed number... |
46 | 47 | MAT | MAT 098 | Intermediate Algebra | Sets and the real number system, polynomial an... |
47 | 48 | MAT | MAT 103 | Geometry and Visualization | Basic plane geometry concepts, emphasizing pro... |
48 | 49 | MAT | MAT 104 | Trigonometry | Elementary plane goemetry, right triangle tirg... |
49 | 50 | PHY | PHY 100 | Survey Of Physics | Basic laws of phyics from the laws of motion t... |
50 | 51 | PHY | PHY 101 | General Physics | Classical mechanics; kinematics and dynamics. ... |
51 | 52 | PHY | PHY 201 | Engineering Physics I | Development of the basic principles of classic... |
52 | 53 | PHY | PHY 203 | Engineering Physics II | Waves and oscillations. Mechanical waves and s... |
53 | 54 | POL | POL 101 | Introduction to Political Science | Introduction to theory, organization, politics... |
54 | 55 | POL | POL 102 | American Government | Origin and development of the U.S. government.... |
55 | 56 | POL | POL 213 | Women and Politics | Introduction to concepts of power and policy i... |
#available tables are buildings, class_rooms, class_sections, course_categories, courses, departments,
#faculty, faculty_categories, faculty_courses, faculty_sections, student_schedules, students
q = 'select * from class_sections natural join courses where categoryid = "MAT"'
x(q)
SectionNumber | CourseID | RoomNumber | Credits | StartTime | Duration | MondaySchedule | TuesdaySchedule | WednesdaySchedule | ThursdaySchedule | FridaySchedule | SaturdaySchedule | CategoryID | CourseCode | CourseName | CourseDescription | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2889 | 45 | 2423 | 5 | 1899-12-30 09:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 080 | Preparatory Mathematics | Individualized instruction in Arithmetic, Alge... |
1 | 2891 | 45 | 2423 | 5 | 1899-12-30 11:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 080 | Preparatory Mathematics | Individualized instruction in Arithmetic, Alge... |
2 | 2895 | 45 | 2423 | 5 | 1899-12-30 13:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 080 | Preparatory Mathematics | Individualized instruction in Arithmetic, Alge... |
3 | 2907 | 46 | 3445 | 5 | 1899-12-30 08:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 097 | Elementary Algebra | First course in Algebra includes signed number... |
4 | 2911 | 46 | 3445 | 5 | 1899-12-30 12:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 097 | Elementary Algebra | First course in Algebra includes signed number... |
5 | 2915 | 46 | 3353 | 5 | 1899-12-30 10:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 097 | Elementary Algebra | First course in Algebra includes signed number... |
6 | 2917 | 47 | 3422 | 5 | 1899-12-30 14:00:00 | 50 | True | True | True | True | True | False | MAT | MAT 098 | Intermediate Algebra | Sets and the real number system, polynomial an... |
7 | 2925 | 47 | 3422 | 5 | 1899-12-30 15:00:00 | 50 | True | False | True | False | True | False | MAT | MAT 098 | Intermediate Algebra | Sets and the real number system, polynomial an... |
8 | 2933 | 47 | 3422 | 5 | 1899-12-30 09:00:00 | 240 | False | False | False | False | False | True | MAT | MAT 098 | Intermediate Algebra | Sets and the real number system, polynomial an... |
9 | 2633 | 48 | 3420 | 5 | 1899-12-30 16:00:00 | 50 | False | True | False | True | False | False | MAT | MAT 103 | Geometry and Visualization | Basic plane geometry concepts, emphasizing pro... |
10 | 2639 | 49 | 3353 | 3 | 1899-12-30 09:00:00 | 50 | True | False | True | False | True | False | MAT | MAT 104 | Trigonometry | Elementary plane goemetry, right triangle tirg... |
11 | 2647 | 49 | 3353 | 3 | 1899-12-30 18:00:00 | 50 | False | True | False | True | False | False | MAT | MAT 104 | Trigonometry | Elementary plane goemetry, right triangle tirg... |
q = 'select * from class_sections natural join courses'
x(q)
SectionNumber | CourseID | RoomNumber | Credits | StartTime | Duration | MondaySchedule | TuesdaySchedule | WednesdaySchedule | ThursdaySchedule | FridaySchedule | SaturdaySchedule | CategoryID | CourseCode | CourseName | CourseDescription | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1000 | 11 | 1231 | 5 | 1899-12-30 10:00:00 | 50 | True | False | True | False | True | False | ART | ART 100 | Introduction to Art | Historical backgrounds and design fundamentals... |
1 | 1002 | 12 | 1619 | 4 | 1899-12-30 15:30:00 | 110 | True | False | True | False | True | False | ART | ART 101 | Design | Studio sudies in the fundamentals of two-dimen... |
2 | 1004 | 13 | 1627 | 4 | 1899-12-30 08:00:00 | 50 | True | False | True | False | True | False | ART | ART 111 | Drawing | Study of line, value, space, perspective, and ... |
3 | 1006 | 13 | 1627 | 4 | 1899-12-30 09:00:00 | 110 | True | False | True | False | True | False | ART | ART 111 | Drawing | Study of line, value, space, perspective, and ... |
4 | 1012 | 14 | 1627 | 4 | 1899-12-30 13:00:00 | 170 | False | True | False | True | False | False | ART | ART 201 | Painting | Beginning painting in oil or synthetic media u... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
71 | 4810 | 23 | 3319 | 5 | 1899-12-30 08:00:00 | 50 | False | True | False | True | False | False | CIS | CIS 101 | Microcomputer Applications | This is a "hands-on" course. Students will lea... |
72 | 4812 | 24 | 3330 | 3 | 1899-12-30 12:00:00 | 80 | False | True | False | True | False | False | CIS | CIS 102 | Information Systems Concepts | Provides a broad introduction to computers and... |
73 | 4813 | 24 | 3406 | 3 | 1899-12-30 10:00:00 | 50 | True | False | True | False | True | False | CIS | CIS 102 | Information Systems Concepts | Provides a broad introduction to computers and... |
74 | 4825 | 25 | 3319 | 5 | 1899-12-30 11:00:00 | 50 | True | True | True | True | True | False | CIS | CIS 114 | Problem Solving and Structured Programming | Covers design, documentation, and coding of pr... |
75 | 4880 | 26 | 1231 | 5 | 1899-12-30 13:30:00 | 110 | True | False | True | False | True | False | CIS | CIS 236 | Database Management | Includes database concepts, data management te... |
76 rows × 16 columns
q = 'select avg(credits), avg(duration) from class_sections natural join courses where categoryid = "MAT"'
x(q)
avg(credits) | avg(duration) | |
---|---|---|
0 | 4.666667 | 65.833333 |
q = 'select avg(credits), avg(duration) from class_sections natural join courses where categoryid = "ART"'
x(q)
avg(credits) | avg(duration) | |
---|---|---|
0 | 4.375 | 87.5 |
q='select categoryid, avg(credits) as mean_credits from class_sections natural join courses group by categoryid'
df=x(q)
df
CategoryID | mean_credits | |
---|---|---|
0 | ACC | 5.000000 |
1 | ART | 4.375000 |
2 | BIO | 5.000000 |
3 | BUS | 4.000000 |
4 | CHE | 4.750000 |
5 | CIS | 4.333333 |
6 | CSC | 5.000000 |
7 | ECO | 5.000000 |
8 | ENG | 5.000000 |
9 | GEG | 5.000000 |
10 | HIS | 5.000000 |
11 | JRN | 2.000000 |
12 | MAT | 4.666667 |
13 | MUS | 4.500000 |
14 | PHY | 5.000000 |
15 | POL | 5.000000 |
import seaborn as sns
sns.catplot(y='CategoryID', x='mean_credits', data=df, kind='bar')
<seaborn.axisgrid.FacetGrid at 0x231c2102700>
q = 'select * from faculty_courses'
df = x(q)
df
FacultyID | CourseID | ProficiencyRating | |
---|---|---|---|
0 | 98005 | 12 | 10 |
1 | 98005 | 16 | 10 |
2 | 98005 | 34 | 9 |
3 | 98005 | 36 | 8 |
4 | 98005 | 38 | 8 |
... | ... | ... | ... |
106 | 98063 | 53 | 8 |
107 | 98064 | 29 | 9 |
108 | 98064 | 37 | 8 |
109 | 98064 | 40 | 9 |
110 | 98064 | 41 | 8 |
111 rows × 3 columns
#save graph as an image file
#import matplotlib.pylot as plt
plt.savefig('filename.png')
#changing the file extension will change the format of the file save