day1
new file
Tone font configure IDLE
run module
Look at python built-in functions
Variable variable
Support for Chinese variables
String string
Single quotation mark double quotation mark escape character\
Alt + P returns to the previous statement
\Put at the end to indicate that the sentence is not finished yet
\n\Can wrap lines
""""""Three double quotes can also wrap lines
array
s='12345' print(s[-1]) # 5 print(s[1]) # 2 print(s[0:4:2]) # 13 print(s[0:4:1]) # 1234 print(s[1:4:1]) # 234 print(s[0:5:2]) #135 print(s[0:]) #12345 print(s[:3]) #123
pycharm
#format() u1='Cat' u2='Tom' print('{} tells {} :"hello"'.format(u1,u2)) #f-sting print(f' tells :"hello"')
#Array/List[] list=[1,2,3.3] print(list[1]) print(list[-1]) list.append(666) #Add End list.append('py') list.insert(1,'learn') #Insert Specified Location list.extend('who') #split list.extend([22,33,44]) print(list) list.pop() #Delete End list.pop(3) #Delete the fourth list.remove(2) #Delete the third print(list) list[2]=888 #Modify 666 to 888 print(list) #List of tuples () that cannot be modified print(list[-1]) print(list[1:4]) #Print 2nd to 5th print(list) #Dictionary {} #Key Value Pair Key-->Value user = { 'name':'Tom', 'age':18, 'gender':'male' } print(user) print(user['age']) user['age']=28 user['habbit']='Play basketball' #Can be added print(user)
output
#Summation n=1 s=0 while n<=100: s+=n n+=1 print(s) #function def sum(n,m): s=0 while n<=m: s+=n n+=1 return s print(sum(1,100)) # n != m
#read f=open('loveyou.txt') s=f.read() print(s) f.close() #write f=open('loveme.txt',mode='w',encoding='utf-8') f.write('I scarce you') f.close()
#Ability to split a Chinese text into sequences of Chinese words import jieba s=jieba.lcut('Ability to split a Chinese text into sequences of Chinese words') print(s)
Day 2
python Library Download
Crawl Curtain
Crawl b Station blackpink to wrap up the first episode of blackpink house
#Ability to split a Chinese text into sequences of Chinese words import requests import json import chardet import re from pprint import pprint #Get cid on bvid request def get_cid(): url='https://api.bilibili.com/x/player/pagelist?bvid=BV1yW411B7ai&jsonp=jsonp' res=requests.get(url).text json_dict=json.loads(res) #pprint(json_dict) return json_dict["data"][0]["cid"] #According to cid request, parse the marquee to get the final data def get_data(cid): final_url="https://api.bilibili.com/x/v1/dm/list.so?oid="+str(cid) final_res=requests.get(final_url) final_res.encoding=chardet.detect(final_res.content)['encoding'] final_res=final_res.text pattern=re.compile('<d.*?>(.*?)</d>') data=pattern.findall(final_res) #pprint(final_res) return data #Save the list of bullets def save_to_file(data): with open("dan_mu.txt",mode="w",encoding="utf-8") as f : for i in data: f.write(i) f.write("\n") cid=get_cid() data=get_data(cid) save_to_file(data)
Word Cloud Map
Note that python naming cannot beWordcloud.py!!!otherwise
from wordcloud import WordCloud will error
import pandas as pd import jieba from wordcloud import WordCloud from imageio import imread import matplotlib.pyplot as plt #Read the marquee file, lcut() participle with open("dan_mu.txt",encoding="utf-8") as f: txt=f.read() cut_list=jieba.lcut(txt) new_str=' '.join(cut_list) #Spell into a string back_picture=imread(r"D:\jenlisa.jpg") wc=WordCloud(font_path='msyh.ttc', background_color="white", max_words=6000, mask=back_picture, max_font_size=200, random_state=40) #.generate(new_str) wc.generate_from_text(new_str) plt.imshow(wc) plt.axis("off") plt.show() wc.to_file("bp.png")
#Optional from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator stopwords=STOPWORDS,# Set up an inactive word img_colors = ImageColorGenerator(back_picture)
cut_list=jieba.lcut(s) new_str=' '.join(cut_list) wc=WordCloud(font_path='msyh.ttc').generate(new_str) wc.to_file('blackpink.png')
generate(text) Generate wordcloud from text. generate_from_text(text) Generate wordcloud from text. generate_from_frequencies Create a word_cloud from words and frequencies. fit_words Create a word_cloud from words and frequencies.
class wordcloud.WordCloud(font_path=None, width=400, height=200, margin=2, ranks_only=None, prefer_horizontal=0.9,mask=None, scale=1, color_func=None, max_words=200, min_font_size=4, stopwords=None, random_state=None,background_color='black', max_font_size=None, font_step=1, mode='RGB', relative_scaling=0.5, regexp=None, collocations=True,colormap=None, normalize_plurals=True)
- Font_Path:stringasMsyh.ttcMicrosoft Yahei System comes with it or downloads it yourself.ttf
- Prefer_HOrizontal:floatDefault 0.9 horizontal typesetting frequency
- Mask defaults to null Draw Word Cloud with 2-D Mask If not null then the mask shape, that is, the white part of the background picture, will not be drawn
- Scale:floatDefault 1 Scale-up Canvas
- font_step default 1 font step larger than 1 will speed up the operation
- Use built-in STOPPWORDS if the STOPWORDS shield word is blank
- Model default RGB is transparent if it is RGBA and background color is not empty???
- relative_scaling default 5 word frequency and font size correlation
- color_func generates a new color usingSelf.color_func
- regexp uses regular expressions to separate input text
- Does collocations include a pair of two words
- Colormap default="viridis" randomly assigns a color to each word if color_is specifiedFunc ignores this method
- process_text(text) Separates long text and removes masked words
- olor([random_state, color_func, colormap]) It is much faster to recolor an existing output than to regenerate the entire word cloud.
to_array() to numpy array
to_file(filename) output to file