python pandas操作Excel整理数据

很久不用Python了,想着简化一下日常繁琐的数据整理,写个脚本。

Script1:

1
2
3
4
5
6
7
8
9
10
import pandas as pd

GlobalIndex = pd.DataFrame(pd.read_excel('global.xlsx', header=5, usecols=[1, 4], skipfooter=1))
uData = pd.DataFrame(pd.read_excel('u.xlsx', header=5, usecols=[1, 2, 3, 4], skipfooter=1))
MergeResult = pd.merge(GlobalIndex,uData,how='inner',on=['Research Fields'])
u = MergeResult.sort_values(by="Web of Science Documents", ascending=False)
u['序号'] = range(1,len(u)+1)
u = u[['序号','Research Fields','Web of Science Documents','Cites','Cites/Paper_y','Cites/Paper_x']]
u.rename(columns={'Research Fields':'学科','Cites/Paper_x':'基准线','Cites/Paper_y':'均篇被引数','Web of Science Documents':'发文数','Cites':'被引数'},inplace = True)
u.to_excel('对比结果.xlsx',index=False)

Script2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd

ClinicalMedicineInstitutions = ['HARVARD UNIVERSITY',
'JOHNS HOPKINS UNIVERSITY',
'SHANGHAI JIAO TONG UNIVERSITY',
'UNIVERSITY OF CALIFORNIA SAN DIEGO',
'SUN YAT SEN UNIVERSITY',
'FUDAN UNIVERSITY',
'PEKING UNIVERSITY',
'UNIVERSITY OF TOKYO',
'CAPITAL MEDICAL UNIVERSITY',
'CHINESE ACADEMY OF MEDICAL SCIENCES - PEKING UNION MEDICAL COLLEGE',
'SICHUAN UNIVERSITY',
'ZHEJIANG UNIVERSITY',
'HUAZHONG UNIVERSITY OF SCIENCE & TECHNOLOGY',
'UNIVERSITY OF HONG KONG',
'CHINESE UNIVERSITY OF HONG KONG']

RawData = pd.DataFrame(pd.read_excel('CLINICAL MEDICINE.xlsx', header=5, usecols=[1, 3, 4, 5], skipfooter=1))
SortByDocs = RawData.sort_values(by="Web of Science Documents", ascending=False)
SortByDocs['DOCS_Rank']= range(1,len(SortByDocs)+1)
SortByCites = SortByDocs.sort_values(by="Cites", ascending=False)
SortByCites['CITES_Rank']= range(1,len(SortByCites)+1)
SortByCitesDivDocs = SortByCites.sort_values(by="Cites/Paper", ascending=False)
SortByCitesDivDocs['Cites/Papers_Rank'] = range(1,len(SortByCitesDivDocs)+1)
InstitutionsFilter_SortByCitesDivDocs = SortByCitesDivDocs[SortByCitesDivDocs.Institutions.isin(ClinicalMedicineInstitutions)]
InstitutionsFilter_SortByCitesDivDocs = InstitutionsFilter_SortByCitesDivDocs.sort_values(by="Web of Science Documents", ascending=False)
InstitutionsFilter_SortByCitesDivDocs.to_excel('临床医学统计结果.xlsx',index=False)

Comments

You forgot to set the shortname for Disqus. Please set it in _config.yml.