Python data analysis and collation

This paper mainly focuses on the application of Python in processing excel & CSV files. All codes are instantiated, substituted and explained one by one.

Manual input and sorting of the full text one by one may lead to errors. Please correct them.

After that, the "Crawler" and "financial quantitative analysis" documents will be sorted out one after another

#   --------------------------------------------------------------------------------------------

1, Query data:

print(data)                         # Query full data values
print(data.index)                   # Search index name
print(data['full name'])                 # Check the value of a specific column
print(data[['full name','Gender']])         # Check the value of specific multiple columns
print(data.values)                  # Check the value of each column
print(data.sort_index())            # Sort by index column
print(data.sort_values('date'))     # Sort by a specific column of data
print(data['a'][0])                 # Check the values of specific columns and rows, i.e. column a and row o
print(data.loc[0]['a'])             # Check the values of specific rows and columns, i.e. row 0 and column a   (by index)
print(data.iloc[0][0])              # Check the values of specific rows and columns, i.e. row 0 and column a (by the number of rows and columns)
print(data.dtypes)                  # Query data type
print(data.columns)                 # Check all column names
print(data.loc[0:3])                # Check the values of the specified rows
print(data.head())                  # Check the value of the first 5 lines (default)
print(data.head(3))                 # Check the values of the first three lines
print(data.tail(5))                 # Check the value of the last 5 lines (default)
print(data.tail(3))                 # Check the values of the last three lines
print(data.shape)                   # How many rows and columns are there in the whole table
print(data.fillna(1))               # Fill the blanks in the table with the number 1
print(data.replace('Jack','Jason'))    # Replace all jacks in the table with Jason
print(data.reset_index(drop=True))     # Delete index
print(data.apply(np.square))           # Square the data items
print(data.describe())                 # Check the average / maximum / minimum / standard deviation of each column of data
print(data['full name'].value_counts())     # Check the number of occurrences of the same value in the name column

2, Data deletion:

1.Delete single or multiple rows
print(data.drop(2))                                  # Delete line 2 data
print(data.drop(labels=[1,3]))                       # Delete lines 1 to 3

2.Delete single or multiple columns
print(data.drop('language',axis=1))                       # Delete language to a column of data
print(data.drop(labels=['language','English'],axis=1))       # Delete the language and English columns

3.Delete duplicate lines
print(data.drop_duplicates(data['full name'],keep='first'))      # Delete duplicate rows and keep the first duplicate value
print(data.drop_duplicates(data['full name'],keep='last'))       # Delete duplicate rows and keep the last duplicate value
print(data.drop_duplicates(data['full name'],keep=False))        # Delete duplicate rows and leave no duplicate values
print(data.drop_duplicates(data['full name','Gender'],keep=False))   # Delete multiple column duplicate rows and keep no duplicate values

3, Processing missing / blank values:

1.Show blank values
print(data.isnull())                      # Judge whether there is a null value in the table. If there is a null value, it is True
print(data.notnull())                     # Judge whether there is a null value in the table. If it is not null, it is True

2.Delete blank values
print(data.dropna())                      # Delete the row with blank value (if there is a blank value, the whole row of data will be deleted)
print(data.dropna(axis=1))                # Delete the column with blank value (if there is a blank value, the whole column of data will be deleted)
print(data.dropna(how='all'))             # If the entire row is null, the entire row will be deleted. Otherwise, do not delete
print(data.dropna(subset=['language','mathematics']))   # Deletes null values from the specified multiple columns

3.Fill in blank values
print(data.fillna(100))                    # Fill in the blank value in the whole data as 100
print(data.fillna({'language':2,'mathematics':3}))     # Fill in the blank value of the Chinese column as 2 and the blank value of the mathematical column as 3
print(data.fillna(method='ffill'))         # Fill the empty value between two non empty values with the non empty value above and fill it down completely
print(data.fillna(method='bfill'))          # Fill up the empty value between two non empty values according to the non empty value below
print(data.fillna(method='ffill',limit=2))   # Fill the null value between two non null values with 2 rows down according to the non null value above

4, Splitting and merging Excel files

import pandas as pd
import os      # If the function involves folder operation, you need to import the os module
 route='c:/pandas'     # If the path has no end. txt/.xlsx, it indicates a folder

1.Multiple files in a folder Excel merge

New data=pd.ExcelWriter('c:/pandas/New data.xlsx') 
for i in os.listdir(route):       
    data=pd.read_excel('route/File name 1.xlsx',header=1)
    New data=pd.concat([New data,data])        
New data.to_excel('route/New data.xlsx')
# Create a merged Excel file in the 'c:/pandas' folder
# 'OS. Listdir (path)': read the file names of all files in this folder
# concat: merge data into new data

2.Will one Excel Multiple in sheet Merge into one sheet

data=pd.read_excel('route/data.xlsx',sheet_name=None)   
SHEET name=list(data.keys())    
New data=pd.DataFrame()  
for i in SHEET Name:           
    New data 1=data[i]
    New data=pd.concat([New data, new data 1])    
New data.to_excel('route/New data.xlsx')
# sheet_name=None: read all sheets in Excel
# list: get the names of all sheet s in the data
# pd.DataFrame: create an empty table for connection and name it new data
# for: loop each sheet name
# concat: merge new data 1 into new data

3.Will one Excel One of sheet Split into multiple sheet (Specifies that a column is split into different columns sheet)

data=pd.read_excel('route/data.xlsx')
Department details=list(data['department'].drop_duplicates())    
New data=pd.ExcelWriter('c:/pandas/New data.xlsx')   
for i in Department details:                
    Data 1=data[data['department']==i]
Data 1.to_excel(New data,sheet_name=i)
# drop_duplicates: because the Department details in the Department column will be split into different sheet s, the duplicate values of the Department column will be removed first
# Create a merged Excel file in the 'c:/pandas' folder


4.Will one Excel Split into multiple Excel

data=pd.read_excel('route/data.xlsx')
Department details=list(data['department'].drop_duplicates())              
for i in Department details:                  
    Data 1=data[data['department']==i]
Data 1.to_excel('{0}.xlsx',format(i))        
# Fill the content format of format(i) into {} and generate Excel named i

5, String processing:

1. Character segmentation

print(data['full name'].str.cat(sep='*'))                           
# Example: Wang Wu * Zhao Liu * NaN * Li Kui
# 1. Concatenate all names in the name column and separate them with "*"    2. cat is concatenation and sep is adding separator

print(data['full name'].str.cat(['Transfiguration']*len(data),sep='^'))         
# Example: Wang Wu's transformation   Zhao Liu's transformation    NaN ^ transformation   Li Kui's transformation
# Add two characters after all names in the name column and separate them with "^"

print(data['full name'].str.cat(['Transfiguration']*len(data),sep='^'),na_rep='No,'))          
# Example: Wang Wu's transformation   Zhao Liu's transformation   No ^ change   Li Kui's transformation
# na_rep = 'none': replace the null value in the name column with the word "None"

print(data['pet name'].str.split())           
# Example: Wang Xiaoming   Li Xiaoma, Wang Dajun, Zhao Xiaoliu
# Separate the small columns

print(data['pet name'].str.split('Small'))       
# Example: [Wang, Ming]   [Li, Ma] [Wang Dajun] [Zhao, Liu]
# Separate the small columns and separate them with "small", delete the small words and separate them with commas

print(data['pet name'].str.split('Small',expend=True))           
# Example: [Wang Ming]   [Li Ma] [Wang Dajun] [Zhao Liu]
# Separate small columns and separate them with "small" words, delete small words and separate them with spaces

print('EeEeEe'.partition('e'))                          
# Example: 'e', 'e', 'EE'
# Use the first e word encountered from left to right as a separator and keep the e word

2. Character acquisition

print(data['pet name'].str.get(2))              
# Example: Ming Dynasty   horse    Army   six
# Gets the third character of the small column

print(data['pet name'].str.slice(0,2))            
# Example: Wang Xiao   Li Xiao   Wang Da   Zhao Xiao
# Get the characters of small name column positions 1 ~ 2

print(data['pet name'].str.slice_replace(1.3,'of'))      
# Example: Wang Zhi   Li Zhi   Wang Zhi   Zhao Zhi
# Get the data of the small column and replace the data of 2 ~ 3 with the word "Zhi"

print(data['pet name'].str.join('of'))        
# Example:   Wang Zhixiao Zhiming   Li Zhixiao's horse king's big army Zhao Zhixiao's six
# Get the data of the small column and link the characters with the word zigzag

print(data['date'].astype('str').str.find('-'))            
# Example: May 1, 2020  >>  Location: 4
# Find the specific location of '-' in the date column. If it is not found, "- 1" will be displayed

3. Character verification

print(data['pet name'].str.contains('of',na='No,'))
# Query whether the small column contains the word "Zhi". If yes, it will display "True", and if not, it will display "None"

print(data['pet name'].str.startswith('king'))
# Query whether the small column starts with "King". If yes, it will display "True", otherwise it will display "False"

print(data['pet name'].str.endswith('king'))
# Query whether the small column ends with "King". If yes, it will display "True", otherwise it will display "False"

print(data.str.isalpha())
# Judge whether the string is all letters. If yes, it displays True and if no, it displays False

print(data.str.isnumeric())
# Judge whether all strings are numbers. If yes, it displays True and if no, it displays False

print(data.str.isalnum())
# Judge whether the string is a combination of letters and numbers. If yes, it displays True and if no, it displays False

print(data.str.isspace())
# Judge whether the strings are all spaces. Whether to display True or False

print(data.str.islower())
# Judge whether the strings are all lowercase, whether to display True and whether to display False

print(data.str.istitle())
# Judge whether all word initials of the string are capitalized, whether to display True and whether to display False

4. Character filling

print(data['pet name'].str.repeat(3))         
# Example: Wang Xiaoming Wang Xiaoming Wang Xiaoming   Li Xiaoma Li Xiaoma Li Xiaoma Li Xiaoma
# Repeat the value of the small column 3 times

print(data['pet name'].str.pad(5,fillchar='&'))            
# Example:  &&& Wang Ming   && Li Xiaoma  && Da Jun Wang  && Zhao Xiaoliu
# Set the data of the name column to 5 characters and fill the missing value with "&" from the left

print(data['pet name'].str.pad(5,fillchar='&',side='right'))     
# Example: Wang Ming&&&   Li Xiaoma & & Wang Dajun & & Zhao Xiaoliu&&
# Set the data of the name column to 5 characters and fill the missing value with "&" from the right

print(data['pet name'].str.pad(5,fillchar='&'side='both'))       
# Ex amp le:  && Wang Ming&  & Li Xiaoma & Wang Dajun&  & Zhao Xiaoliu&
# Set the data of the name column to 5 characters, and fill in the missing value with "&" on both sides

print(data['pet name'].str.zfill(5) 
# Example:   000 Wang Ming    00 Li Xiaoma   Wang Dajun   00 Zhao Xiaoliu
# Set the data of the name column to 5 characters and fill the missing value with "0" from the left

5. Character encoding conversion

print(data['full name'].str.encode('utf-8'))
# Code conversion

print(data['full name'].str.decode('utf-8'))
# Decoding conversion

6. Character replacement

print(data.str.lower())
# Convert all characters to lowercase

print(data.str.upper())
# Convert all characters to uppercase

print(data.str.title())
# Capitalize the first letter of each word

print(data.str.capitalize())
# First capital letter

print(data.str.swapcase())
# Case swap

Dictionaries=str.maketrans({'front''qian','after':'hou'})

print(data['position'],str.translate(Dictionaries))       
# Example:   qianhou left and right
# Specify replacement characters

7. Remove Spaces /Specified character

print(data['full name'].str.strip('after'))       
# Example: front left and right
# Delete the word "after" in the name column string

print(data['full name'].str.strip())
# Removes spaces from the name column string


6, Discretization and crating:

particular year=[1992,1983,1922,1932,1973]
case=[1900,1950,2000]

result=pd.cut(particular year,case)               # The boxes are divided into sections according to the standard of 19001952000

print(result)
# For example: (19502000], (19502000], (19001950], (19001950], (19502000]
# Indicates which section of the box the data in the year is located in

print(pd.value_counts(result))
# Example: (19502000] > > 3       (1900,1950] >> 2    
# Indicates the number of years in each interval
 result=pd.cut(particular year,case,labels=False)

print(pd.value_counts(result))          
# Example: 1 > > 3    0 >> 2   
# It means that the above intervals are replaced by 0 and 1, and the number of years in each interval is counted

Box name=['50 last year','50 Years later']
result=pd.cut(particular year,case,labels=Box name)
print(pd.value_counts(result))               
# Example:   50 years later  >>  three    50 years ago  >>  two   
# Indicates that the box name is used as the name of the interval

result=pd.qcut(particular year,q=4)              # qcut means equal division
print(pd.value_counts(result))        
# Sort the years from small to large, and then divide them into 4 equal parts


7, Multi level indexing and calculation:

data=pd.read_excel(route,sheet_name='Orderly',index_col=['class','Student number'])     
data=data.set_index('class','Student number')               
data=data.loc[('1 class',slice(None)),:]            # slice(None) shows all lines in shift 1      
data=data.loc[('1 class','Xiao Ming',slice(None)),:]     # Query the relevant data of Xiao Ming in class 1      
# sheet_name: rename the sheet of Excel
# index_col: set the class and student number as the index
# -----------------------------------------------------------------

Example:
tomato           sweet potato          total
 sales volume  Gross profit     sales volume  Gross profit     sales volume    Gross profit
20     2       30    5       50      7
40     3       60    4       100     7

data=pd.read_excel(route,header=[0,1])               # Set the first and second rows as indexes
 total=data['tomato']+data['sweet potato']            # Add the sales & gross profit of tomato and sweet potato respectively
 total.columns=pd.MultiIndex.from_product([['total'],total.columns])
print(total)
result=pd.concat([data.total].axis=1)             # Connect tomato and sweet potato horizontally with the total value


8, Data replacement:

data=data.replace('ninth','eighth')         
 # Replace all ninth with eighth

data['numerical value'].replace('ninth','eighth',inplace=True)           
# Replace all ninth in the numeric column with eighth

Dictionaries={'A':20,'B':30}
data['numerical value'].replace(Dictionaries,inplace=True)           
# Replace all A's in the numeric column with 20 and B's with 30

data['numerical value'].replace(['A','B'],30,inplace=True)         
# Replace all A and B in the numeric column with 30

data.replace([A-Z],88,regex=True,inplace=True)         
# Use regular expression to replace all values from A to Z with 88, and regex=True is A required statement of regular expression

9, Column calculation:

describe   >>     method     >>     Inversion method
 plus     >>     add     >>     add
 reduce     >>     sub     >>     sub
 ride     >>     mul     >>     mul
 except     >>     div     >>     div
 to be divisible by   >>   floordiv   >>    floordiv
N Power  >>    pow      >>     pow
data['sales amount']=data['Unit Price']*data['sales volumes']
data['sales amount']=data['Unit Price']*2
def Price increase(X):
    return x+3
 data['Unit Price']=data['Unit Price'].apply(Price increase)                    # Add 3 to each line of data, method 1
 data['Unit Price']=data['Unit Price'].apply(lambda x:x+3)            # Add 3 to each line of data, method 2

difference=data['End date']-data['Start date']                    # Two column date difference
 data['Interval date']=difference.apply(lambda x:x days)             # Convert difference to date format

data=data['Store 1'].fillna(0)+data['Store 2'].fillna(0)         
#When the data of store 1 or store 2 contains a null value, fill the null value as 0
import pandas as pd
import numpy as np

list=[[1,2,3],[4,5,6],[7,8,9]]

data.pd.DataFrame(list,columns=list('xyz'),index=list('abc'))       # Method 1  
data.pd.DataFrame(list,columns=['x','y','z'],index=['a','b','c'])   # Method 2
# Take "x", "y" and "z" as the column names of the "list" respectively

print(data.apply(np.square))             
# Square the entire data

print(data.apply(lambda m:np.square(m) if m.name=='x' else m))          
# Take m as a variable. If the column name is' x ', square the X column and keep the other columns unchanged

print(data.apply(lambda m:np.square(m) if m.name=='x' else m,axis=1))   
# Take m as a variable. If the column name is' x ', square the X row and keep the other rows unchanged

print(data.apply(lambda m:np.square(m) if m.name in list('yz') else m))   
# Take m as a variable. If the column name is' y 'or' z ', square the x and Y columns, and the other columns remain unchanged

print(data.apply(lambda m:np.square(m) if m.name in list('yz') else m,axis=1))   
# Take m as a variable. If the row names are 'y' or 'and' z ', square the x and Y rows, and the other rows remain unchanged


Data connection:


1. Concat

Basic data 1

A

B

C

D

0

A0

B0

C0

D0

1

A1

B1

C1

D1

2

A2

B2

C2

D2

3

A3

B3

C3

D3

Basic data 2

A

B

C

D

4

A4

B4

C4

D4

5

A5

B5

C5

D5

6

A6

B6

C6

D6

7

A7

B7

C7

D7

Basic data 3

B

D

F

2

B2

D2

F2

3

B3

D3

F3

6

B6

D6

F6

7

B7

D7

F7

Data 1= pd.concat([basic data 1, basic data 2])    #  Method 1

Data 1=   Master data 1. Append (master data 2)     #  Method 2

Data 1

A

B

C

D

0

A0

B0

C0

D0

1

A1

B1

C1

D1

2

A2

B2

C2

D2

3

A3

B3

C3

D3

4

A4

B4

C4

D4

5

A5

B5

C5

D5

6

A6

B6

C6

D6

7

A7

B7

C7

D7

Data 2= pd.concat([basic data 1, basic data 3],axis=1)     # Line alignment

Data 2

A

B

C

D

B

D

F

0

A0

B0

C0

D0

NaN

NaN

NaN

1

A1

B1

C1

D1

NaN

NaN

NaN

2

A2

B2

C2

D2

B2

D2

F2

3

A3

B3

C3

D3

B3

D3

F3

6

NaN

NaN

NaN

NaN

B6

D6

F6

7

NaN

NaN

NaN

NaN

B7

D7

F7

Data 3= pd.concat([Master Data 1, master data 2],keys = ['master data 1', 'master data 2') # method 1

Data 3= pd.concat({'master data 1': master data 1, 'master data 2': master data 2})  #  Method 2

Data 3

A

B

C

D

Basic data 1

0

A0

B0

C0

D0

Basic data 1

1

A1

B1

C1

D1

Basic data 1

2

A2

B2

C2

D2

Basic data 1

3

A3

B3

C3

D3

Basic data 2

4

A4

B4

C4

D4

Basic data 2

5

A5

B5

C5

D5

Basic data 2

6

A6

B6

C6

D6

Basic data 2

7

A7

B7

C7

D7

Data 4=pd.concat([Master Data 1, master data 3],axis=1,join='inner ')

Data 4

A

B

C

D

B

D

F

2

A2

B2

C2

D2

B2

D2

F2

3

A3

B3

C3

D3

B3

D3

F3

Data 5=pd.concat([Master Data 1, master data 3],axis=1,join_axis = [Master Data 1.index])

Data 5

A

B

C

D

B

D

F

0

A0

B0

C0

D0

NaN

NaN

NaN

1

A1

B1

C1

D1

NaN

NaN

NaN

2

A2

B2

C2

D2

B2

D2

F2

3

A3

B3

C3

D3

B3

D3

F3


2. Join

Basic data 1

A

B

key

0

A0

B0

K0

1

A1

B1

K1

2

A2

B2

K0

3

A3

B3

K1

Basic data 2

C

D

K0

C0

D0

K1

C1

D1

Data3 = data1. Join (data2, on ='key ')

Data 1

A

B

key

C

D

0

A0

B0

K0

C0

D0

1

A1

B1

K1

C1

D1

2

A2

B2

K0

C0

D0

3

A3

B3

K1

C1

D1

3. Merge

Basic data 1

A

B

key1

key2

0

A0

B0

K0

K0

1

A1

B1

K0

K1

2

A2

B2

K1

K0

3

A3

B3

K2

K1

Basic data 2

C

D

key1

key2

0

C0

D0

K0

K0

1

C1

D1

K1

K0

2

C2

D2

K1

K0

3

C3

D3

K2

K0

Data 1 = pd.merge (basic data 1, basic data 2,on=['key1','key2 '])

Data 1

A

B

key1

key2

C

D

0

A0

B0

K0

K0

C0

D0

1

A1

B1

K1

K0

C1

D1

2

A2

B2

K1

K0

C2

D2

Data 2 = pd.merge (basic data 1, basic data 2, on = ['key1 ','key2'], how ='outer ')   # Outer: all external connection. Connect basic data 1 & 2 according to the two columns' key1 '&' key2 'of basic data 1 & 2

Data 2

A

B

key1

key2

C

D

0

A0

B0

K0

K0

C0

D0

1

A1

B1

K0

K1

NaN

NaN

2

A2

B2

K1

K0

C1

D1

3

A2

B2

K1

K0

C2

D2

4

A3

B3

K2

K1

NaN

NaN

5

NaN

NaN

K2

K0

C3

D3

Data 3 = pd.merge (basic data 1, basic data 2, on = ['key1 ','key2'], how ='left ')   # Left: left connection. Connect basic data 1 & 2 according to the two columns' key1 'and' key2 'of basic data 2

Data 3

A

B

key1

key2

C

D

0

A0

B0

K0

K0

C0

D0

1

A1

B1

K0

K1

NaN

NaN

2

A2

B2

K1

K0

C1

D1

3

A2

B2

K1

K0

C2

D2

4

A3

B3

K2

K1

NaN

NaN

Data 4 = pd.merge (basic data 1, basic data 2, on = ['key1 ',' key2 '], how ='right')  # Right: right connection. Connect basic data 1 & 2 according to the two columns' key1 '&' key2 'of basic data 1

Data 4

A

B

key1

key2

C

D

0

A0

B0

K0

K0

C0

D0

1

A2

B2

K1

K0

C1

D1

2

A2

B2

K1

K0

C2

D2

3

NaN

NaN

K2

K0

C3

D3

Data 5 = pd.merge (basic data 1, basic data 2)
Data 6 = PD. Merge (master data 1, master data 2,on = 'name', how='right ') # connect master data 1 & 2 according to the name column of master data 1
Data 7 = pd.merge (master data 1, master data 2,on = 'name', how='left ')  # Connect master data 1 & 2 according to the name column of master data 2
Data 8 = PD. Merge (master data 1, master data 2,on = 'name', how='outer ') # connect master data 1 & 2 according to all name columns of master data 1 & 2
Data 9 = pd.merge (master data 1, master data 2,left_on = 'name', right_index=True)     #   Connect the index column of master data 2 according to the name column of master data 1
Data 10 = pd.merge (basic data 1, basic data 2,on='k',suffixes=['_l','_x '])      # Suffixes is a suffix parameter

# ---------------------------------------------------------------------------------


Code format specification and typesetting:

#   The wavy line below the code indicates that the code input is not standardized, but the code can still be executed normally.

#   If the format is not clear whether it is standardized, you can copy and paste the runnable code to the blank, and the system will automatically correct the code format.

Or Ctrl+Alt+L to format the code directly


1, Blank line:
1. Two empty lines are reserved at the top and bottom of the import statement
2. Leave two blank lines above the def function declaration
3. Leave two blank lines above the class declaration
4. Leave a blank line above the def method declaration
5. One blank line is reserved between two logical code blocks

2, Space:

#   For ease of reading, use ^ instead of space below
1. Leave a space before and after the assignment symbol                   # Example:   a^=^10
2. All binary operators and numbers are separated by spaces            # Example:   a^+=^c^+^d
3. (), [], {} no spaces in brackets                     # Example: total = pd.concat([data. Total])
4. There is no space before comma, colon and semicolon, and there is a space after them      # Example:     print(x,^y)
5. There is no space before the left bracket of parameter list, index and slice           # Example:     list[index]

3, Indent:
Four spaces are an indent level

4, Line break:
1. Break after comma
2. Disconnect before operation symbol

5, Number type classification:
int      Integer type          # Example:   one
float    Floating point type          # Example:   one
Complex complex type          # Example:   1+2j   (1 is the real part and 2 is the imaginary part)
bool     Boolean type          # Example: only False/True

6, Operator classification:
1. Arithmetic operator:
+   Add          # 1+2=3   or 'Hello'+'World'='HelloWorld'     Sum between numeric types and connectors between other types
-   Minus          # 2-1=1
*   Ride          # 2*3=6   or  ' Hello'*2='HelloHello'        Quadrature is used between numeric types, and repeated operations are used between other types
/   Except          # 2/1=2
%   Surplus        # 3%2=1
//Rounding        # 3//2=1   or -3//2=-2     Find the largest integer of quotient less than a/b
**Power (power)    # 10**2=100

2. Relational operators:
==    be equal to          # Returns True when a is equal to b, otherwise False
!=    Not equal to         # Returns True when a is not equal to b, otherwise False
>     greater than
<     less than
>=    Greater than or equal to
<=    Less than or equal to

3. Logical operators:
not    Not         # If a is True, False is returned; if a is False, True is returned
and    And         # a. b all True, return True, otherwise False
or     Or         # a. B one or more is True, return True, otherwise False

4. Assignment operator:
+=      a+=b    >>   a=a+b
-=      a-=b    >>   a=a-b
*=      a*=b    >>   a=a*b
/=      a/=b    >>   a=a/b
%=      a%=b    >>   a=a%b
**=     a**=b   >>   a=a**b
//=     a//=b   >>   a=a//b
&=      a&=b    >>   a=a&b
>=      a>=b   >>   a=a>b
<=      a<=b   >>   a=a<b

5. Bitwise operators:
~      Bit inversion
&      Bit and
|      Bit or
^      Bit exclusive or
>     Shift right
<     Shift left

7, Operator priority:
()             parentheses
def()          function call
[],[]          section
[]             subscript
.              Reference class member
**             power
~              Bit inversion
+,-            Sign
*,/,%          Multiply, divide, remainder
+,-            Add, subtract
<,>           displacement
&              Bit and
^              Bit exclusive or
|              Bit or
in,not in,is,is not,<,<=,>,>=,<>,!=,==           compare
not            wrong
and            And
or             or
lambda()       Lambda expression

8, Control statement:
1. Branch statement
one point one   if structure:
#If the condition evaluates to True, the statement is executed; otherwise, the statement after if is executed
    if score >= 85:
         print('excellent ')
    if score < 60:
         print('Come on ')
    if (score >= 60)and(score <= 85):
         print('effort ')
one point two   If else structure:
#First judge the if condition. If True, execute statement 1, and then skip else statement and statement group 2; if False, skip statement 1 and execute statement 2
    if score >= 60:
         print('pass')
        if score >= 90:
             print('excellent ')
    else:
     print('fail ')
one point three   elif structure:
#It is a multi-layer nesting of if else structure, and only one statement in the if condition is executed
    if score >= 90:
        grade='A'
    elif score >= 80:
        grade='B'
    elif score >= 70:
        grade='C'
    else:
        grade='F'
    print('Grade='+grade)

2. Circular statement
2.1 while statement:
#There is no limit to the number of cycles. As long as the conditions are met, it will cycle
    i=0
    while i*i<100
        i+=1
    print('i={0}'.format(i))
    print('i*i={0}'.format(i*i))
2.2 for statement:
#Used for sequence loop. The sequence includes string, list and tuple
for item in 'Hello':
     print(item)           # Output: h e l o
for item in range(1,10,2)              # 1 represents the start value and 10 represents the end value   2 represents step size
     print('Count is:{0}'.format(item))   # Output: count is: 1    Count is: 3    Count is: 5    Count is: 7   Count is: 9

3. Jump statement
3.1 break statement:
#Forcibly exit the loop body without executing the remaining statements
    for item in range(5)
        if item == 3
            break
         print('Count is:{0}'.format(item))     # Output: count is: 0    Count is: 1    Count is: 2
3.2 continue statement:
#Skip the if target statement and execute the loop body
    for item in range (5)
        if item == 3
            continue
         print('Count is:{0}'.format(item))      # Output: count is: 0    Count is: 1    Count is: 2    Count is: 4

# ------------------------------------------------------------------------------------------

Common exceptions:
1. AttributeError exception:           Access elements that do not exist in a class (including: member variables, attributes, member methods)
2. OSError/IOError/FileNotFoundError exception:      Operating system related exceptions (e.g. "file not found" or "disk full")
3. IndexError exception:               When accessing a sequence element, the subscript index exceeds the value range (for example, there are 4 elements in the sequence, and this exception will pop up when you want to access the fifth element)
4. KeyError exception:                 Access keys that do not exist in the dictionary
5. NameError exception:                Use a variable that does not exist
6. TypeError exception:                The passed in variable type does not meet the requirements
7. ValueError exception:               An invalid parameter value was passed in


# ----------------------------------------------------------------------------------------------

Calculation of date:
#datetime represents time and date  ;   Date means date  ;  Time is the time of the day  ; timedelta represents the time difference
import datetime as date module:
def cumulative month (date, incoming month):
     Year = incoming month / / 12
     Incoming month = date. Month + incoming month% 12
     if month! = 12:
         Year = year + Month / / 12
         Month = month% 12
     return date module. Date (date. Year + year, month, date. day)
Start date = date module. date(2020,05,26)
for i in data. index:
     Data ['date']. at[i] = start date + date module. timedelta(days=i)     # Example: May 26, 2020    2020.05.27    2020.05.28
     Data ['date']. at[i] = date module. Date (start date. year+i, start date. month, start date. day)      # Example: May 26, 2020    2021.05.26    2022.05.26
     Data ['date']. at[i] = cumulative month (start date, i)               # Example: May 26, 2020    2020.06.26    2020.07.26

Datetime module:
print(datetime.datetime.today())      #  Output:   2021-10-13 16:02:56.794003

#   Returns the current local date and time    
print(datetime.datetime.now())        Output:    2021-10-13 16:02:56.794003

#   Returns the current local date and time     
print(datetime.datetime.utcnow())    Output:   2021-10-13 08:02:56.794003

#   Return the current UTC date and time (8 hours difference between UTC time and Beijing time)
print(datetime.datetime.fromtimestamp(999999999.999))         Output:    2001-09-09 09:46:39.999000

#Returns the local date and time corresponding to the UNIX timestamp      

print(datetime.datetime.utcfromtimestamp(999999999.999))      Output:    2001-09-09 01:46:39.999000

#Returns the UTC date and time corresponding to the UNIX timestamp       
print(datetime.date(2021,10,13))          Output: 2021-10-13

#Represents date information              

print(datetime.time(23,59,58,1999))        Output: 23:59:58.001999

#Represents the specific time of day information     

print(datetime.timedelta())               

#Difference between calculation dates

Tags: Python Data Analysis

Posted on Wed, 27 Oct 2021 12:12:00 -0400 by N350CA