import pandas as pd
mba1 = pd.read_excel('BW_MBA_data.xlsx')
mba1.head()
mba=pd.ExcelFile('BW_MBA_data.xlsx')
mba_data=mba.parse('MBA Data')
mba_data.head()
Fulltime Business Week Ranking | School Name | State | Type | Enrollment | Avg GMAT | Resident Tuition, Fees | Pct International | Pct Female | Pct Asian American | Pct Minority | Pct with job offers | Avg starting base salary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | University of Chicago | Illinois | Private | 1144 | 713.0 | 97165.0 | 35.0 | 35.0 | 16.0 | 7.0 | 92.0 | 107091.0 |
1 | 2 | Harvard University | Massachusetts | Private | 1801 | 720.0 | 101660.0 | 33.0 | 38.0 | NaN | NaN | 94.0 | 124378.0 |
2 | 3 | Northwestern University | Illinois | Private | 1200 | 711.0 | 93918.0 | 34.0 | 36.0 | 25.0 | 13.0 | 95.0 | 108064.0 |
3 | 4 | University of Pennsylvania | Pennsylvania | Private | 1651 | 714.0 | 104410.0 | 44.0 | 36.0 | 7.8 | 9.0 | 89.0 | 112186.0 |
4 | 5 | University of Michigan | Michigan | Public | 898 | 706.0 | 80879.0 | 27.0 | 34.0 | 21.0 | 13.0 | 89.0 | 103608.0 |
ug_data=mba.parse('Undergraduate Data')
ug_data.head()
2009 Rank | 2008 Rank | School Name | Location | Type | Program Length | Annual Cost | Fulltime enrollment | Student Rank | Recruiter Rank | Median Starting Salary | MBA Feeder Rank | Academic Quality Rank | Faculty Student Ratio | Average SAT Score | Average ACT Score | Teaching Quality Grade | Facilities & Service Grade | Job Placement Grade | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2.0 | Virginia (McIntire) | Charlottesville | Public | 2 | 9490 | 655 | 1 | 52 | 58000 | 5 | 5 | 10.40 | 1355 | 30 | A+ | A+ | A+ |
1 | 2 | 3.0 | Notre Dame (Mendoza) | South Bend, Ind. | Private | 3 | 36847 | 1669 | 2 | 12 | 55000 | 11 | 16 | 18.57 | 1405 | 32 | A+ | A+ | A+ |
2 | 3 | 1.0 | Pennsylvania (Wharton) | Philadelphia | Private | 4 | 37526 | 2528 | 13 | 13 | 61001 | 10 | 1 | 10.89 | 1440 | 32 | A+ | A | A+ |
3 | 4 | 6.0 | Michigan (Ross) | Ann Arbor | Public | 3 | 10848 | 1050 | 18 | 8 | 60000 | 7 | 8 | 15.22 | 1346 | 30 | B | A | A+ |
4 | 5 | 7.0 | Brigham Young (Marriott) | Provo, Utah | Private | 2 | 4110 | 1783 | 6 | 1 | 50000 | 17 | 40 | 19.00 | 1231 | 27 | A | A+ | A+ |
NZStats = pd.read_csv('annual-enterprise-survey-2019-financial-year-provisional-csv.csv')
NZStats.head()
Year | Industry_aggregation_NZSIOC | Industry_code_NZSIOC | Industry_name_NZSIOC | Units | Variable_code | Variable_name | Variable_category | Value | Industry_code_ANZSIC06 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2019 | Level 1 | 99999 | All industries | Dollars (millions) | H01 | Total income | Financial performance | 728,239 | ANZSIC06 divisions A-S (excluding classes K633... |
1 | 2019 | Level 1 | 99999 | All industries | Dollars (millions) | H04 | Sales, government funding, grants and subsidies | Financial performance | 643,809 | ANZSIC06 divisions A-S (excluding classes K633... |
2 | 2019 | Level 1 | 99999 | All industries | Dollars (millions) | H05 | Interest, dividends and donations | Financial performance | 62,924 | ANZSIC06 divisions A-S (excluding classes K633... |
3 | 2019 | Level 1 | 99999 | All industries | Dollars (millions) | H07 | Non-operating income | Financial performance | 21,505 | ANZSIC06 divisions A-S (excluding classes K633... |
4 | 2019 | Level 1 | 99999 | All industries | Dollars (millions) | H08 | Total expenditure | Financial performance | 634,710 | ANZSIC06 divisions A-S (excluding classes K633... |
air_quality=pd.read_csv('air_quality_LA.txt', sep=',') #sep='\t'
air_quality.head()
Time | Max Air Quality Index Los Angeles California Max AQI\n | |
---|---|---|
0 | 20090507 | 47 |
1 | 20090501 | 53 |
2 | 20090425 | 36 |
3 | 20090419 | 36 |
4 | 20090413 | 55 |
employees=pd.read_excel('employee_data.xlsx')
employees.head()
Employee | Gender | Age | Prior Experience | Beta Experience | Education | Annual Salary | |
---|---|---|---|---|---|---|---|
0 | 1 | 1 | 39 | 5 | 12 | 4 | 57700 |
1 | 2 | 0 | 44 | 12 | 8 | 6 | 76400 |
2 | 3 | 0 | 24 | 0 | 2 | 4 | 44000 |
3 | 4 | 1 | 25 | 2 | 1 | 4 | 41600 |
4 | 5 | 0 | 56 | 5 | 25 | 8 | 163900 |
groups = ['Gender', 'Education']
by_sex_edu = employees.groupby(groups, as_index=False).mean()
round(by_sex_edu[['Gender','Education','Age']],2)
Gender | Education | Age | |
---|---|---|---|
0 | 0 | 0 | 33.50 |
1 | 0 | 2 | 37.22 |
2 | 0 | 4 | 37.22 |
3 | 0 | 6 | 40.75 |
4 | 0 | 8 | 45.50 |
5 | 1 | 0 | 30.38 |
6 | 1 | 2 | 39.70 |
7 | 1 | 4 | 42.72 |
8 | 1 | 6 | 38.26 |
9 | 1 | 8 | 35.40 |
import seaborn as sns
import matplotlib.pyplot as plt
sns.barplot(x='Gender',y='Age', data=employees)
plt.show()
sns.countplot(x='Gender', data=employees)
plt.show()
sns.set_style('whitegrid')
sns.countplot(x='Gender', data=employees)
plt.title('Count of Gender')
plt.ylabel('Count')
sns.despine()
plt.show()
plt.scatter(employees['Age'],employees['Annual Salary'], color='red')
plt.show()
plt.scatter(employees['Age'],employees['Annual Salary'], c=employees['Gender'])
plt.show()
plt.xlabel('variable1')
plt.ylabel('variable2')
plt.xlim([1,2])
plt.ylim([0,50])
plt.title('text string')
plt.legend(loc='lower center')
#show
plt.savefig('name.png')
sns.regplot(x='Age',y='Annual Salary',data=employees, scatter=None,color='blue')
plt.show()
sns.lmplot(x='Age',y='Annual Salary',data=employees, hue='Gender',row='Gender')
plt.show()
sns.pairplot(employees)
plt.show()