This paper mainly focuses on the application of Python in processing excel & CSV files. All codes are instantiated, substituted and explained one by one.
Manual input and sorting of the full text one by one may lead to errors. Please correct them.
After that, the "Crawler" and "financial quantitative analysis" documents will be sorted out one after another
# --------------------------------------------------------------------------------------------
1, Query data:
print(data) # Query full data values print(data.index) # Search index name print(data['full name']) # Check the value of a specific column print(data[['full name','Gender']]) # Check the value of specific multiple columns print(data.values) # Check the value of each column print(data.sort_index()) # Sort by index column print(data.sort_values('date')) # Sort by a specific column of data print(data['a'][0]) # Check the values of specific columns and rows, i.e. column a and row o print(data.loc[0]['a']) # Check the values of specific rows and columns, i.e. row 0 and column a (by index) print(data.iloc[0][0]) # Check the values of specific rows and columns, i.e. row 0 and column a (by the number of rows and columns) print(data.dtypes) # Query data type print(data.columns) # Check all column names print(data.loc[0:3]) # Check the values of the specified rows print(data.head()) # Check the value of the first 5 lines (default) print(data.head(3)) # Check the values of the first three lines print(data.tail(5)) # Check the value of the last 5 lines (default) print(data.tail(3)) # Check the values of the last three lines print(data.shape) # How many rows and columns are there in the whole table print(data.fillna(1)) # Fill the blanks in the table with the number 1 print(data.replace('Jack','Jason')) # Replace all jacks in the table with Jason print(data.reset_index(drop=True)) # Delete index print(data.apply(np.square)) # Square the data items print(data.describe()) # Check the average / maximum / minimum / standard deviation of each column of data print(data['full name'].value_counts()) # Check the number of occurrences of the same value in the name column
2, Data deletion:
1.Delete single or multiple rows print(data.drop(2)) # Delete line 2 data print(data.drop(labels=[1,3])) # Delete lines 1 to 3 2.Delete single or multiple columns print(data.drop('language',axis=1)) # Delete language to a column of data print(data.drop(labels=['language','English'],axis=1)) # Delete the language and English columns 3.Delete duplicate lines print(data.drop_duplicates(data['full name'],keep='first')) # Delete duplicate rows and keep the first duplicate value print(data.drop_duplicates(data['full name'],keep='last')) # Delete duplicate rows and keep the last duplicate value print(data.drop_duplicates(data['full name'],keep=False)) # Delete duplicate rows and leave no duplicate values print(data.drop_duplicates(data['full name','Gender'],keep=False)) # Delete multiple column duplicate rows and keep no duplicate values
3, Processing missing / blank values:
1.Show blank values print(data.isnull()) # Judge whether there is a null value in the table. If there is a null value, it is True print(data.notnull()) # Judge whether there is a null value in the table. If it is not null, it is True 2.Delete blank values print(data.dropna()) # Delete the row with blank value (if there is a blank value, the whole row of data will be deleted) print(data.dropna(axis=1)) # Delete the column with blank value (if there is a blank value, the whole column of data will be deleted) print(data.dropna(how='all')) # If the entire row is null, the entire row will be deleted. Otherwise, do not delete print(data.dropna(subset=['language','mathematics'])) # Deletes null values from the specified multiple columns 3.Fill in blank values print(data.fillna(100)) # Fill in the blank value in the whole data as 100 print(data.fillna({'language':2,'mathematics':3})) # Fill in the blank value of the Chinese column as 2 and the blank value of the mathematical column as 3 print(data.fillna(method='ffill')) # Fill the empty value between two non empty values with the non empty value above and fill it down completely print(data.fillna(method='bfill')) # Fill up the empty value between two non empty values according to the non empty value below print(data.fillna(method='ffill',limit=2)) # Fill the null value between two non null values with 2 rows down according to the non null value above
4, Splitting and merging Excel files
import pandas as pd import os # If the function involves folder operation, you need to import the os module route='c:/pandas' # If the path has no end. txt/.xlsx, it indicates a folder 1.Multiple files in a folder Excel merge New data=pd.ExcelWriter('c:/pandas/New data.xlsx') for i in os.listdir(route): data=pd.read_excel('route/File name 1.xlsx',header=1) New data=pd.concat([New data,data]) New data.to_excel('route/New data.xlsx') # Create a merged Excel file in the 'c:/pandas' folder # 'OS. Listdir (path)': read the file names of all files in this folder # concat: merge data into new data 2.Will one Excel Multiple in sheet Merge into one sheet data=pd.read_excel('route/data.xlsx',sheet_name=None) SHEET name=list(data.keys()) New data=pd.DataFrame() for i in SHEET Name: New data 1=data[i] New data=pd.concat([New data, new data 1]) New data.to_excel('route/New data.xlsx') # sheet_name=None: read all sheets in Excel # list: get the names of all sheet s in the data # pd.DataFrame: create an empty table for connection and name it new data # for: loop each sheet name # concat: merge new data 1 into new data 3.Will one Excel One of sheet Split into multiple sheet (Specifies that a column is split into different columns sheet) data=pd.read_excel('route/data.xlsx') Department details=list(data['department'].drop_duplicates()) New data=pd.ExcelWriter('c:/pandas/New data.xlsx') for i in Department details: Data 1=data[data['department']==i] Data 1.to_excel(New data,sheet_name=i) # drop_duplicates: because the Department details in the Department column will be split into different sheet s, the duplicate values of the Department column will be removed first # Create a merged Excel file in the 'c:/pandas' folder 4.Will one Excel Split into multiple Excel data=pd.read_excel('route/data.xlsx') Department details=list(data['department'].drop_duplicates()) for i in Department details: Data 1=data[data['department']==i] Data 1.to_excel('{0}.xlsx',format(i)) # Fill the content format of format(i) into {} and generate Excel named i
5, String processing:
1. Character segmentation print(data['full name'].str.cat(sep='*')) # Example: Wang Wu * Zhao Liu * NaN * Li Kui # 1. Concatenate all names in the name column and separate them with "*" 2. cat is concatenation and sep is adding separator print(data['full name'].str.cat(['Transfiguration']*len(data),sep='^')) # Example: Wang Wu's transformation Zhao Liu's transformation NaN ^ transformation Li Kui's transformation # Add two characters after all names in the name column and separate them with "^" print(data['full name'].str.cat(['Transfiguration']*len(data),sep='^'),na_rep='No,')) # Example: Wang Wu's transformation Zhao Liu's transformation No ^ change Li Kui's transformation # na_rep = 'none': replace the null value in the name column with the word "None" print(data['pet name'].str.split()) # Example: Wang Xiaoming Li Xiaoma, Wang Dajun, Zhao Xiaoliu # Separate the small columns print(data['pet name'].str.split('Small')) # Example: [Wang, Ming] [Li, Ma] [Wang Dajun] [Zhao, Liu] # Separate the small columns and separate them with "small", delete the small words and separate them with commas print(data['pet name'].str.split('Small',expend=True)) # Example: [Wang Ming] [Li Ma] [Wang Dajun] [Zhao Liu] # Separate small columns and separate them with "small" words, delete small words and separate them with spaces print('EeEeEe'.partition('e')) # Example: 'e', 'e', 'EE' # Use the first e word encountered from left to right as a separator and keep the e word 2. Character acquisition print(data['pet name'].str.get(2)) # Example: Ming Dynasty horse Army six # Gets the third character of the small column print(data['pet name'].str.slice(0,2)) # Example: Wang Xiao Li Xiao Wang Da Zhao Xiao # Get the characters of small name column positions 1 ~ 2 print(data['pet name'].str.slice_replace(1.3,'of')) # Example: Wang Zhi Li Zhi Wang Zhi Zhao Zhi # Get the data of the small column and replace the data of 2 ~ 3 with the word "Zhi" print(data['pet name'].str.join('of')) # Example: Wang Zhixiao Zhiming Li Zhixiao's horse king's big army Zhao Zhixiao's six # Get the data of the small column and link the characters with the word zigzag print(data['date'].astype('str').str.find('-')) # Example: May 1, 2020 >> Location: 4 # Find the specific location of '-' in the date column. If it is not found, "- 1" will be displayed 3. Character verification print(data['pet name'].str.contains('of',na='No,')) # Query whether the small column contains the word "Zhi". If yes, it will display "True", and if not, it will display "None" print(data['pet name'].str.startswith('king')) # Query whether the small column starts with "King". If yes, it will display "True", otherwise it will display "False" print(data['pet name'].str.endswith('king')) # Query whether the small column ends with "King". If yes, it will display "True", otherwise it will display "False" print(data.str.isalpha()) # Judge whether the string is all letters. If yes, it displays True and if no, it displays False print(data.str.isnumeric()) # Judge whether all strings are numbers. If yes, it displays True and if no, it displays False print(data.str.isalnum()) # Judge whether the string is a combination of letters and numbers. If yes, it displays True and if no, it displays False print(data.str.isspace()) # Judge whether the strings are all spaces. Whether to display True or False print(data.str.islower()) # Judge whether the strings are all lowercase, whether to display True and whether to display False print(data.str.istitle()) # Judge whether all word initials of the string are capitalized, whether to display True and whether to display False 4. Character filling print(data['pet name'].str.repeat(3)) # Example: Wang Xiaoming Wang Xiaoming Wang Xiaoming Li Xiaoma Li Xiaoma Li Xiaoma Li Xiaoma # Repeat the value of the small column 3 times print(data['pet name'].str.pad(5,fillchar='&')) # Example: &&& Wang Ming && Li Xiaoma && Da Jun Wang && Zhao Xiaoliu # Set the data of the name column to 5 characters and fill the missing value with "&" from the left print(data['pet name'].str.pad(5,fillchar='&',side='right')) # Example: Wang Ming&&& Li Xiaoma & & Wang Dajun & & Zhao Xiaoliu&& # Set the data of the name column to 5 characters and fill the missing value with "&" from the right print(data['pet name'].str.pad(5,fillchar='&'side='both')) # Ex amp le: && Wang Ming& & Li Xiaoma & Wang Dajun& & Zhao Xiaoliu& # Set the data of the name column to 5 characters, and fill in the missing value with "&" on both sides print(data['pet name'].str.zfill(5) # Example: 000 Wang Ming 00 Li Xiaoma Wang Dajun 00 Zhao Xiaoliu # Set the data of the name column to 5 characters and fill the missing value with "0" from the left 5. Character encoding conversion print(data['full name'].str.encode('utf-8')) # Code conversion print(data['full name'].str.decode('utf-8')) # Decoding conversion 6. Character replacement print(data.str.lower()) # Convert all characters to lowercase print(data.str.upper()) # Convert all characters to uppercase print(data.str.title()) # Capitalize the first letter of each word print(data.str.capitalize()) # First capital letter print(data.str.swapcase()) # Case swap Dictionaries=str.maketrans({'front''qian','after':'hou'}) print(data['position'],str.translate(Dictionaries)) # Example: qianhou left and right # Specify replacement characters 7. Remove Spaces /Specified character print(data['full name'].str.strip('after')) # Example: front left and right # Delete the word "after" in the name column string print(data['full name'].str.strip()) # Removes spaces from the name column string
6, Discretization and crating:
particular year=[1992,1983,1922,1932,1973] case=[1900,1950,2000] result=pd.cut(particular year,case) # The boxes are divided into sections according to the standard of 19001952000 print(result) # For example: (19502000], (19502000], (19001950], (19001950], (19502000] # Indicates which section of the box the data in the year is located in print(pd.value_counts(result)) # Example: (19502000] > > 3 (1900,1950] >> 2 # Indicates the number of years in each interval result=pd.cut(particular year,case,labels=False) print(pd.value_counts(result)) # Example: 1 > > 3 0 >> 2 # It means that the above intervals are replaced by 0 and 1, and the number of years in each interval is counted Box name=['50 last year','50 Years later'] result=pd.cut(particular year,case,labels=Box name) print(pd.value_counts(result)) # Example: 50 years later >> three 50 years ago >> two # Indicates that the box name is used as the name of the interval result=pd.qcut(particular year,q=4) # qcut means equal division print(pd.value_counts(result)) # Sort the years from small to large, and then divide them into 4 equal parts
7, Multi level indexing and calculation:
data=pd.read_excel(route,sheet_name='Orderly',index_col=['class','Student number']) data=data.set_index('class','Student number') data=data.loc[('1 class',slice(None)),:] # slice(None) shows all lines in shift 1 data=data.loc[('1 class','Xiao Ming',slice(None)),:] # Query the relevant data of Xiao Ming in class 1 # sheet_name: rename the sheet of Excel # index_col: set the class and student number as the index # ----------------------------------------------------------------- Example: tomato sweet potato total sales volume Gross profit sales volume Gross profit sales volume Gross profit 20 2 30 5 50 7 40 3 60 4 100 7 data=pd.read_excel(route,header=[0,1]) # Set the first and second rows as indexes total=data['tomato']+data['sweet potato'] # Add the sales & gross profit of tomato and sweet potato respectively total.columns=pd.MultiIndex.from_product([['total'],total.columns]) print(total) result=pd.concat([data.total].axis=1) # Connect tomato and sweet potato horizontally with the total value
8, Data replacement:
data=data.replace('ninth','eighth') # Replace all ninth with eighth data['numerical value'].replace('ninth','eighth',inplace=True) # Replace all ninth in the numeric column with eighth Dictionaries={'A':20,'B':30} data['numerical value'].replace(Dictionaries,inplace=True) # Replace all A's in the numeric column with 20 and B's with 30 data['numerical value'].replace(['A','B'],30,inplace=True) # Replace all A and B in the numeric column with 30 data.replace([A-Z],88,regex=True,inplace=True) # Use regular expression to replace all values from A to Z with 88, and regex=True is A required statement of regular expression
9, Column calculation:
describe >> method >> Inversion method plus >> add >> add reduce >> sub >> sub ride >> mul >> mul except >> div >> div to be divisible by >> floordiv >> floordiv N Power >> pow >> pow
data['sales amount']=data['Unit Price']*data['sales volumes'] data['sales amount']=data['Unit Price']*2 def Price increase(X): return x+3 data['Unit Price']=data['Unit Price'].apply(Price increase) # Add 3 to each line of data, method 1 data['Unit Price']=data['Unit Price'].apply(lambda x:x+3) # Add 3 to each line of data, method 2 difference=data['End date']-data['Start date'] # Two column date difference data['Interval date']=difference.apply(lambda x:x days) # Convert difference to date format data=data['Store 1'].fillna(0)+data['Store 2'].fillna(0) #When the data of store 1 or store 2 contains a null value, fill the null value as 0
import pandas as pd import numpy as np list=[[1,2,3],[4,5,6],[7,8,9]] data.pd.DataFrame(list,columns=list('xyz'),index=list('abc')) # Method 1 data.pd.DataFrame(list,columns=['x','y','z'],index=['a','b','c']) # Method 2 # Take "x", "y" and "z" as the column names of the "list" respectively print(data.apply(np.square)) # Square the entire data print(data.apply(lambda m:np.square(m) if m.name=='x' else m)) # Take m as a variable. If the column name is' x ', square the X column and keep the other columns unchanged print(data.apply(lambda m:np.square(m) if m.name=='x' else m,axis=1)) # Take m as a variable. If the column name is' x ', square the X row and keep the other rows unchanged print(data.apply(lambda m:np.square(m) if m.name in list('yz') else m)) # Take m as a variable. If the column name is' y 'or' z ', square the x and Y columns, and the other columns remain unchanged print(data.apply(lambda m:np.square(m) if m.name in list('yz') else m,axis=1)) # Take m as a variable. If the row names are 'y' or 'and' z ', square the x and Y rows, and the other rows remain unchanged
Data connection:
1. Concat
Basic data 1 | A | B | C | D |
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
Basic data 2 | A | B | C | D |
4 | A4 | B4 | C4 | D4 |
5 | A5 | B5 | C5 | D5 |
6 | A6 | B6 | C6 | D6 |
7 | A7 | B7 | C7 | D7 |
Basic data 3 | B | D | F |
2 | B2 | D2 | F2 |
3 | B3 | D3 | F3 |
6 | B6 | D6 | F6 |
7 | B7 | D7 | F7 |
Data 1= pd.concat([basic data 1, basic data 2]) # Method 1
Data 1= Master data 1. Append (master data 2) # Method 2
Data 1 | A | B | C | D |
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
4 | A4 | B4 | C4 | D4 |
5 | A5 | B5 | C5 | D5 |
6 | A6 | B6 | C6 | D6 |
7 | A7 | B7 | C7 | D7 |
Data 2= pd.concat([basic data 1, basic data 3],axis=1) # Line alignment
Data 2 | A | B | C | D | B | D | F |
0 | A0 | B0 | C0 | D0 | NaN | NaN | NaN |
1 | A1 | B1 | C1 | D1 | NaN | NaN | NaN |
2 | A2 | B2 | C2 | D2 | B2 | D2 | F2 |
3 | A3 | B3 | C3 | D3 | B3 | D3 | F3 |
6 | NaN | NaN | NaN | NaN | B6 | D6 | F6 |
7 | NaN | NaN | NaN | NaN | B7 | D7 | F7 |
Data 3= pd.concat([Master Data 1, master data 2],keys = ['master data 1', 'master data 2') # method 1
Data 3= pd.concat({'master data 1': master data 1, 'master data 2': master data 2}) # Method 2
Data 3 | A | B | C | D | |
Basic data 1 | 0 | A0 | B0 | C0 | D0 |
Basic data 1 | 1 | A1 | B1 | C1 | D1 |
Basic data 1 | 2 | A2 | B2 | C2 | D2 |
Basic data 1 | 3 | A3 | B3 | C3 | D3 |
Basic data 2 | 4 | A4 | B4 | C4 | D4 |
Basic data 2 | 5 | A5 | B5 | C5 | D5 |
Basic data 2 | 6 | A6 | B6 | C6 | D6 |
Basic data 2 | 7 | A7 | B7 | C7 | D7 |
Data 4=pd.concat([Master Data 1, master data 3],axis=1,join='inner ')
Data 4 | A | B | C | D | B | D | F |
2 | A2 | B2 | C2 | D2 | B2 | D2 | F2 |
3 | A3 | B3 | C3 | D3 | B3 | D3 | F3 |
Data 5=pd.concat([Master Data 1, master data 3],axis=1,join_axis = [Master Data 1.index])
Data 5 | A | B | C | D | B | D | F |
0 | A0 | B0 | C0 | D0 | NaN | NaN | NaN |
1 | A1 | B1 | C1 | D1 | NaN | NaN | NaN |
2 | A2 | B2 | C2 | D2 | B2 | D2 | F2 |
3 | A3 | B3 | C3 | D3 | B3 | D3 | F3 |
2. Join
Basic data 1 | A | B | key |
0 | A0 | B0 | K0 |
1 | A1 | B1 | K1 |
2 | A2 | B2 | K0 |
3 | A3 | B3 | K1 |
Basic data 2 | C | D |
K0 | C0 | D0 |
K1 | C1 | D1 |
Data3 = data1. Join (data2, on ='key ')
Data 1 | A | B | key | C | D |
0 | A0 | B0 | K0 | C0 | D0 |
1 | A1 | B1 | K1 | C1 | D1 |
2 | A2 | B2 | K0 | C0 | D0 |
3 | A3 | B3 | K1 | C1 | D1 |
3. Merge
Basic data 1 | A | B | key1 | key2 |
0 | A0 | B0 | K0 | K0 |
1 | A1 | B1 | K0 | K1 |
2 | A2 | B2 | K1 | K0 |
3 | A3 | B3 | K2 | K1 |
Basic data 2 | C | D | key1 | key2 |
0 | C0 | D0 | K0 | K0 |
1 | C1 | D1 | K1 | K0 |
2 | C2 | D2 | K1 | K0 |
3 | C3 | D3 | K2 | K0 |
Data 1 = pd.merge (basic data 1, basic data 2,on=['key1','key2 '])
Data 1 | A | B | key1 | key2 | C | D |
0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | A1 | B1 | K1 | K0 | C1 | D1 |
2 | A2 | B2 | K1 | K0 | C2 | D2 |
Data 2 = pd.merge (basic data 1, basic data 2, on = ['key1 ','key2'], how ='outer ') # Outer: all external connection. Connect basic data 1 & 2 according to the two columns' key1 '&' key2 'of basic data 1 & 2
Data 2 | A | B | key1 | key2 | C | D |
0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | A1 | B1 | K0 | K1 | NaN | NaN |
2 | A2 | B2 | K1 | K0 | C1 | D1 |
3 | A2 | B2 | K1 | K0 | C2 | D2 |
4 | A3 | B3 | K2 | K1 | NaN | NaN |
5 | NaN | NaN | K2 | K0 | C3 | D3 |
Data 3 = pd.merge (basic data 1, basic data 2, on = ['key1 ','key2'], how ='left ') # Left: left connection. Connect basic data 1 & 2 according to the two columns' key1 'and' key2 'of basic data 2
Data 3 | A | B | key1 | key2 | C | D |
0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | A1 | B1 | K0 | K1 | NaN | NaN |
2 | A2 | B2 | K1 | K0 | C1 | D1 |
3 | A2 | B2 | K1 | K0 | C2 | D2 |
4 | A3 | B3 | K2 | K1 | NaN | NaN |
Data 4 = pd.merge (basic data 1, basic data 2, on = ['key1 ',' key2 '], how ='right') # Right: right connection. Connect basic data 1 & 2 according to the two columns' key1 '&' key2 'of basic data 1
Data 4 | A | B | key1 | key2 | C | D |
0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | A2 | B2 | K1 | K0 | C1 | D1 |
2 | A2 | B2 | K1 | K0 | C2 | D2 |
3 | NaN | NaN | K2 | K0 | C3 | D3 |
Data 5 = pd.merge (basic data 1, basic data 2)
Data 6 = PD. Merge (master data 1, master data 2,on = 'name', how='right ') # connect master data 1 & 2 according to the name column of master data 1
Data 7 = pd.merge (master data 1, master data 2,on = 'name', how='left ') # Connect master data 1 & 2 according to the name column of master data 2
Data 8 = PD. Merge (master data 1, master data 2,on = 'name', how='outer ') # connect master data 1 & 2 according to all name columns of master data 1 & 2
Data 9 = pd.merge (master data 1, master data 2,left_on = 'name', right_index=True) # Connect the index column of master data 2 according to the name column of master data 1
Data 10 = pd.merge (basic data 1, basic data 2,on='k',suffixes=['_l','_x ']) # Suffixes is a suffix parameter
# ---------------------------------------------------------------------------------
Code format specification and typesetting:
# The wavy line below the code indicates that the code input is not standardized, but the code can still be executed normally.
# If the format is not clear whether it is standardized, you can copy and paste the runnable code to the blank, and the system will automatically correct the code format.
Or Ctrl+Alt+L to format the code directly
1, Blank line:
1. Two empty lines are reserved at the top and bottom of the import statement
2. Leave two blank lines above the def function declaration
3. Leave two blank lines above the class declaration
4. Leave a blank line above the def method declaration
5. One blank line is reserved between two logical code blocks
2, Space:
# For ease of reading, use ^ instead of space below
1. Leave a space before and after the assignment symbol # Example: a^=^10
2. All binary operators and numbers are separated by spaces # Example: a^+=^c^+^d
3. (), [], {} no spaces in brackets # Example: total = pd.concat([data. Total])
4. There is no space before comma, colon and semicolon, and there is a space after them # Example: print(x,^y)
5. There is no space before the left bracket of parameter list, index and slice # Example: list[index]
3, Indent:
Four spaces are an indent level
4, Line break:
1. Break after comma
2. Disconnect before operation symbol
5, Number type classification:
int Integer type # Example: one
float Floating point type # Example: one
Complex complex type # Example: 1+2j (1 is the real part and 2 is the imaginary part)
bool Boolean type # Example: only False/True
6, Operator classification:
1. Arithmetic operator:
+ Add # 1+2=3 or 'Hello'+'World'='HelloWorld' Sum between numeric types and connectors between other types
- Minus # 2-1=1
* Ride # 2*3=6 or ' Hello'*2='HelloHello' Quadrature is used between numeric types, and repeated operations are used between other types
/ Except # 2/1=2
% Surplus # 3%2=1
//Rounding # 3//2=1 or -3//2=-2 Find the largest integer of quotient less than a/b
**Power (power) # 10**2=100
2. Relational operators:
== be equal to # Returns True when a is equal to b, otherwise False
!= Not equal to # Returns True when a is not equal to b, otherwise False
> greater than
< less than
>= Greater than or equal to
<= Less than or equal to
3. Logical operators:
not Not # If a is True, False is returned; if a is False, True is returned
and And # a. b all True, return True, otherwise False
or Or # a. B one or more is True, return True, otherwise False
4. Assignment operator:
+= a+=b >> a=a+b
-= a-=b >> a=a-b
*= a*=b >> a=a*b
/= a/=b >> a=a/b
%= a%=b >> a=a%b
**= a**=b >> a=a**b
//= a//=b >> a=a//b
&= a&=b >> a=a&b
>= a>=b >> a=a>b
<= a<=b >> a=a<b
5. Bitwise operators:
~ Bit inversion
& Bit and
| Bit or
^ Bit exclusive or
> Shift right
< Shift left
7, Operator priority:
() parentheses
def() function call
[],[] section
[] subscript
. Reference class member
** power
~ Bit inversion
+,- Sign
*,/,% Multiply, divide, remainder
+,- Add, subtract
<,> displacement
& Bit and
^ Bit exclusive or
| Bit or
in,not in,is,is not,<,<=,>,>=,<>,!=,== compare
not wrong
and And
or or
lambda() Lambda expression
8, Control statement:
1. Branch statement
one point one if structure:
#If the condition evaluates to True, the statement is executed; otherwise, the statement after if is executed
if score >= 85:
print('excellent ')
if score < 60:
print('Come on ')
if (score >= 60)and(score <= 85):
print('effort ')
one point two If else structure:
#First judge the if condition. If True, execute statement 1, and then skip else statement and statement group 2; if False, skip statement 1 and execute statement 2
if score >= 60:
print('pass')
if score >= 90:
print('excellent ')
else:
print('fail ')
one point three elif structure:
#It is a multi-layer nesting of if else structure, and only one statement in the if condition is executed
if score >= 90:
grade='A'
elif score >= 80:
grade='B'
elif score >= 70:
grade='C'
else:
grade='F'
print('Grade='+grade)
2. Circular statement
2.1 while statement:
#There is no limit to the number of cycles. As long as the conditions are met, it will cycle
i=0
while i*i<100
i+=1
print('i={0}'.format(i))
print('i*i={0}'.format(i*i))
2.2 for statement:
#Used for sequence loop. The sequence includes string, list and tuple
for item in 'Hello':
print(item) # Output: h e l o
for item in range(1,10,2) # 1 represents the start value and 10 represents the end value 2 represents step size
print('Count is:{0}'.format(item)) # Output: count is: 1 Count is: 3 Count is: 5 Count is: 7 Count is: 9
3. Jump statement
3.1 break statement:
#Forcibly exit the loop body without executing the remaining statements
for item in range(5)
if item == 3
break
print('Count is:{0}'.format(item)) # Output: count is: 0 Count is: 1 Count is: 2
3.2 continue statement:
#Skip the if target statement and execute the loop body
for item in range (5)
if item == 3
continue
print('Count is:{0}'.format(item)) # Output: count is: 0 Count is: 1 Count is: 2 Count is: 4
# ------------------------------------------------------------------------------------------
Common exceptions:
1. AttributeError exception: Access elements that do not exist in a class (including: member variables, attributes, member methods)
2. OSError/IOError/FileNotFoundError exception: Operating system related exceptions (e.g. "file not found" or "disk full")
3. IndexError exception: When accessing a sequence element, the subscript index exceeds the value range (for example, there are 4 elements in the sequence, and this exception will pop up when you want to access the fifth element)
4. KeyError exception: Access keys that do not exist in the dictionary
5. NameError exception: Use a variable that does not exist
6. TypeError exception: The passed in variable type does not meet the requirements
7. ValueError exception: An invalid parameter value was passed in
# ----------------------------------------------------------------------------------------------
Calculation of date:
#datetime represents time and date ; Date means date ; Time is the time of the day ; timedelta represents the time difference
import datetime as date module:
def cumulative month (date, incoming month):
Year = incoming month / / 12
Incoming month = date. Month + incoming month% 12
if month! = 12:
Year = year + Month / / 12
Month = month% 12
return date module. Date (date. Year + year, month, date. day)
Start date = date module. date(2020,05,26)
for i in data. index:
Data ['date']. at[i] = start date + date module. timedelta(days=i) # Example: May 26, 2020 2020.05.27 2020.05.28
Data ['date']. at[i] = date module. Date (start date. year+i, start date. month, start date. day) # Example: May 26, 2020 2021.05.26 2022.05.26
Data ['date']. at[i] = cumulative month (start date, i) # Example: May 26, 2020 2020.06.26 2020.07.26
Datetime module:
print(datetime.datetime.today()) # Output: 2021-10-13 16:02:56.794003
# Returns the current local date and time
print(datetime.datetime.now()) Output: 2021-10-13 16:02:56.794003
# Returns the current local date and time
print(datetime.datetime.utcnow()) Output: 2021-10-13 08:02:56.794003
# Return the current UTC date and time (8 hours difference between UTC time and Beijing time)
print(datetime.datetime.fromtimestamp(999999999.999)) Output: 2001-09-09 09:46:39.999000
#Returns the local date and time corresponding to the UNIX timestamp
print(datetime.datetime.utcfromtimestamp(999999999.999)) Output: 2001-09-09 01:46:39.999000
#Returns the UTC date and time corresponding to the UNIX timestamp
print(datetime.date(2021,10,13)) Output: 2021-10-13
#Represents date information
print(datetime.time(23,59,58,1999)) Output: 23:59:58.001999
#Represents the specific time of day information
print(datetime.timedelta())
#Difference between calculation dates