import pandas as pd
pd.__version__
fn = 'mydata.xlsx'
df=pd.read_excel(fn)'
pandas.read_csv(fn, sep='\t', lineterminator='\r')
pandas.set_option('display.max_rows', None) #change None to a number to specify
df=pd.read_csv(fn, sep='\t', lineterminator='\r',usecols=[2,3,7],names=['Description','ID','Group'])
df = pd.read_csv(fn)
df = pd.read_json(fn)
df = pd.read_sql('select * from mytable', mycn)
df=pd.read_html("https://docs.microsoft.com/en-us/windows-hardware/design/minimum/supported/windows-11-supported-intel-processors")
df_cpu = df[0]
df=pd.read_excel(fn, dtype={'Job Ticket':object})'
df=pd.read_csv(fn, index_col=['region','job ticket'])
df=df.sort_index()
df.head(5)
df.tail(4)
df.loc[-1] = ["This is a new row","2nd column of new row"]
df['Address'] = address
df.index = df.index + 1
df.sort_index()
df = df.drop(df.index[7])
index_vals = [1,2,3,4,5]
df.index = index_vals
df.reset_index() #old index is still kept in the data
df.drop(columns=["index"])
df[['name','address']]
df[['name','address']].head(10)
df.loc[10]
df.loc[1:4]
tempdf=df[["STATE","NAME","CENSUS2010POP"]]
df3 = df_HBCU.loc[df_HBCU["State/Territory"] == "Alabama", ["School","City"]]
#slice using labels
df.loc[9:,"Name":"Address"]
df.iloc[2:6]
df.iloc[2:6, 3:7]
df.iloc[[1,5,7], :]
df[df["Year"]>1999]
df[df["Year"]==1999]
df[ (df["Year"]>1999) & (df["Country"]=="USA" ) ]
df[ (df["Year"]>1999) | (df["Country"]=="USA" ) ]
df[ ~(df["Country"]=="USA" ) ]
df['Country'].value_counts()
df.groupby(["staff first name", "staff last name"]).size()
df2=df.groupby(["staff first name", "staff last name"]).size().reset_index(name="Count")
df.groupby( ['Type','Status'] ).agg( Total_Charges = ('Total Charges','sum'), Job_Count = ('Status','count'), ).reset_index()
df = df["Departments"].unique()
df.sort_values(['Date'])
df.pivot(index='processDate',columns='brand',values='cost')
df.describe()
df=df.rename(columns = {'col old name':'col new name'})
data=df.groupby(["staff first name", "staff last name"]).size()
new_df=pd.DataFrame(data=data)
new_df=new_df.reset_index()
# then change the column names of any column as seen above