AI基础应用篇
AI基础应用篇1.自然语言处理NLP1.1任务:1.自然语言理解(NLU)将给定自然语言输入映射为有用的表示分析语言的不同方面2.自然语言生成(NLG)文字规划:知识库中检索句子规划:选择适合单词,短语,句子语气文本实现:句子结构1.2NLP术语音韵,形态 , 语素 , 语法,语义 , 语用学 , 话语 , 世界知识1.3NLP步骤词法分析:分解段落,句子,单词。句法分析:句法语义分析:含义是否符
AI基础应用篇
1.自然语言处理NLP
1.1任务:
1.自然语言理解(NLU)
将给定自然语言输入映射为有用的表示
分析语言的不同方面
2.自然语言生成(NLG)
文字规划:知识库中检索
句子规划:选择适合单词,短语,句子语气
文本实现:句子结构
1.2NLP术语
音韵 ,形态 , 语素 , 语法, 语义 , 语用学 , 话语 , 世界知识
1.3NLP步骤
词法分析:分解段落,句子,单词。
句法分析:句法
语义分析:含义是否符合
话语整合:与前一个句子联系进行分析
语用分析:实际意义
1.4NLP工具包
导入NLTK
import nltk
nltk.download()
import gensim
import pattern
1.标记化,将文本分割成句子,单词,标点符号
from nltk.tokenize import sent_tokenize #分成几个句子
from nltk.tokenize import word_tokenize #分成单词
from nltk.tokenize import WordPuncttokenizer #分成单词和标点符号
2.词干
from nltk.stem.porter import PorterStemmer
from nltk.stem.lancaster import LancasterStemer
from nltk.stem.snowball import SnowballStemmer
词形还原
from nltk.stem import WordNetLemmatizer
例子:
import nltk
sentence=[("a","DT"),("clever","JJ"),("fox","NN"),("was","VBP"),
("jumping","VBP"),("over","IN"),("the","DT"),("wall","NN")]
#定义语法
grammer="NP:{<DT>?<JJ>*<NN>}"
#解析器解析语法
parser_chunking=nltk.RegexpParser(grammer)
parser_chunking.parse(sentence)
#结果
Output_chunking=parser_chunking.parse(sentence)
Output_chunking.draw()
1.5词袋模型(BOW)
用于从文本提取特征,用于建模,机器学习
#词袋模型
from sklearn.feature_extraction.text import CountVectorizer
#创建矢量定义字符串集合
Setence=['We are using the code of Website Copy','We are still find the secert of code.']
vectorizer_count=CountVectorizer()
feature_text=vectorizer_count.fit_transform(Setence)
print(vectorizer_count.vocabulary_)
1.6预测新闻类型
from sklearn.datasets import fetch_20newsgroups
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
category_map = {'talk.religion.misc':'Religion','rec.autos':'Autos','rec.sport.hockey':'Hockey','sci.electronics':'Electronics', 'sci.space': 'Space'}
training_data=fetch_20newsgroups(subset='train',categories=category_map.keys(),shuffle=True,random_state=5)
#分词
vetorizer_count=CountVectorizer()
train_tc=vetorizer_count.fit_transform(training_data.data)
#统计单词
tfidf=TfidfTransformer()
train_tf=tfidf.fit_transform(train_tc)
#print(train_tf)
input_data = [
'Discovery was a space shuttle',
'Hindu, Christian, Sikh all are religions',
'We must have to drive safely',
'Puck is a disk made of rubber',
'Television, Microwave, Refrigrated all uses electricity'
]
classifer=MultinomialNB()
classifer.fit(train_tf,training_data.target)
#处理input
input_tc=vetorizer_count.transform(input_data)
input_tfidf=tfidf.transform(input_tc)
#预测结果
prediction=classifer.predict(input_tfidf)
for sent,category in zip(input_data,prediction):
print("inputdata:",sent,'\n Category:',category_map[training_data.target_names[category]])
1.7通过名字预测性别
#NLP根据名字判断性别
import random
from nltk import NaiveBayesClassifier #朴素贝叶斯分类器
from nltk.classify import accuracy as nltk_accuracy
from nltk.corpus import names #从文集中导入数据名字
#提取字母作为特征
def extract_features(word,N=2):
last_n_letters=word[-N:]
return {'feature':last_n_letters.lower()}
if __name__=='__main__':
#训练集
male_list=[(name,'male') for name in names.words("male.txt")]
female_list = [(name, 'male') for name in names.words("female.txt")]
data=(male_list+female_list)
random.seed(5)
random.shuffle(data)
#测试
nameinput=['Bob','Alice','Smith']
train_sample=int(0.8*len(data))
for i in range(1, 6):
print('\nNumber of end letters:', i)
features = [(extract_features(n, i), gender) for (n, gender) in data]
train_data, test_data = features[:train_sample],features[train_sample:]
classifier = NaiveBayesClassifier.train(train_data)
accuracy_classifier = round(100 * nltk_accuracy(classifier, test_data), 2)
print('Accuracy = ' + str(accuracy_classifier) + '%')
for name in namesInput:
print(name, '==>', classifier.classify(extract_features(name,i)))
1.8主题建模
揭示给定文档集合中抽象主题或隐藏结构的技术
文本分类,推荐系统
算法:
潜在狄利克雷分配:使用概率图形进行主题建模,gensim包包括LDA
潜在语义分析LDA:基于线性代数,文档术语矩阵使用SVD(奇异值分解)
非负矩阵分解(NMF)
使用:将主题数量作为参数,将文档-词汇矩阵输入,将WTM(词主题矩阵)和TDM(主题文档矩阵)输出
2.分析时间序列数据
使用pandas库对时间序列库处理
其他库:hmmlearn HMM隐马模型算法
pystruct 条件随机场CRF,马尔可夫随机网络M3N,结构向量机
cvxopt 凸优化
2.1处理时间序列
#使用pandas 进行时间序列数据的处理,画图
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
def read_data(input_file):
input_data=np.loadtxt(input_file,delimiter=None)
dates=pd.date_range('1950-01',periods=input_data.shape[0],freq='M')
output=pd.Series(input_data[:,2],index=dates)
return output
if __name__=='__main__':
#txt文件输入路径
input_file='NLP_month_time.txt'
timeseries=read_data(input_file)
#plt画图
plt.figure()
timeseries.plot()
plt.show()
2.2隐马模型
状态S:隐藏状态
输出符号O:可能输出符号
状态转移概率J矩阵A:一个状态过度另一个状态可能性
观察发射概率矩阵B :特定状态发射观测符合可能性。
先验概率矩阵Π :从系统的各种状态开始处于特定状态的概率
3.语音识别
3.1构建语音识别器(ASR)
难点:词汇大小,信道特性(频率,带宽)说话模式(独立词,连接词)口语风格,扬声器依赖,噪音类型,麦克风特性
3.2口语识别
使用Google Speech进行实现
安装PyAudio,SpeechRecognition Google-Speech
import speech_recognition as sr
recording=sr.Recognizer()
#Microphone()模块将声音输入
with sr.Microphone() as source:
recording.adjust_for_ambient_noise(source)
print("Please say something")
audio=recording.listen(source)
try:
print("You said\n "+recording.recognize_google(audio))
except Exception as e:
print(e)
4.简单AI游戏
策略:组合搜索,MiniMax,Alpha-Beta修剪,Negamax算法
使用easyAI库,TwoPlayerGame的继承类进行
需要重写函数有
inti(self,players)
is_over() 返回布尔值
possilbable_moves() 返回数组
make_move(self,move)
umake_move(self,move) #可没有 ,用于make_move的前提条件
show()# 每步游戏的显示内容
scoring() 每步的得分
4.1井字游戏
使用了Negamax算法
#井字游戏
from easyAI import TwoPlayersGame,AI_Player,Negamax
from easyAI.Player import Human_Player
class TicTacToe_game(TwoPlayersGame):
def __init__(self,players):
self.players=players
self.nplayer=1
self.board=[0]*9
def possible_moves(self):
return [x+1 for x,y in enumerate(self.board) if y==0]
#return [x+1 for x in range(len(self.board))]
def make_move(self,move):
self.board[int(move-1)]=self.nplayer
def umake_move(self,move):
self.board[int(move-1)]=0
def condition_for_lose(self):
possible_combinations=[[1,2,3], [4,5,6], [7,8,9],[1,4,7], [2,5,8], [3,6,9], [1,5,9], [3,5,7]]
return any([all([(self.board[z-1]==self.nopponent)
for z in combination]) for combination in possible_combinations])
def is_over(self):
return (self.possible_moves()==[] or self.condition_for_lose())
def show(self):
print('\n' + '\n'.join([' '.join([['.', 'O', 'X'][self.board[3 * j + i]]
for i in range(3)]) for j in range(3)]))
def scoring(self):
return -100 if self.condition_for_lose() else 0
if __name__=='__main__':
algo=Negamax(7)
game=TicTacToe_game([Human_Player(),AI_Player(algo)])
game.play()
4.2取硬币游戏
使用TT
#拿硬币游戏
from easyAI import TwoPlayersGame,id_solve,Human_Player,AI_Player,Negamax
from easyAI.AI import TT
class LastCoin_game(TwoPlayersGame):
def __init__(self,players):
self.players=players
self.nplayer=1
self.num_coins=15
self.max_move=4
def possible_moves(self):
return [str(a) for a in range(1,self.max_move+1)]
def win_game(self):
return self.num_coins<=0
def is_over(self):
return self.win_game()
def make_move(self,move):
self.num_coins-=int(move)
def scoring(self):
return 100 if self.win_game() else 0
def show(self):
print("剩余硬币:",self.num_coins)
if __name__=='__main__':
tt=TT()
LastCoin_game.ttentry=lambda self:self.num_coins
#algo = Negamax(7)
#game=LastCoin_game([Human_Player(), AI_Player(algo)])
#game.play()
r,d,m=id_solve(LastCoin_game,range(2,20),win_score=100,tt=tt)
print(r,d,m)
#who is first
game=LastCoin_game([AI_Player(tt),Human_Player()])
game.play()
5.神经网络
导入NeutroLab库
步骤,获取数据,训练集,测试集,标签
构建神经网络(map)
nl.net.newff([[min,max]],[10,6,1])
或nl.net.newp
训练,error=map.train(data,target,epochs=1000,show=100,goal=0.01)
5.1简单神经网络
import matplotlib.pylab as plt
import neurolab as nl
data=[[1,0],[0,1],[0,0],[1,1]]
target=[[0],[0],[0],[1]]
nel=nl.net.newp([[0,1],[0,1]],1) #2个输入一个神经元创建网络
progress=nel.train(data,target,epochs=100,show=10,lr=0.1)
#可视化
plt.figure()
plt.plot(progress)
plt.xlabel("Numbers of epochs")
plt.ylabel("rating of error")
plt.grid()
plt.show()
5.2单层神经网络
import matplotlib.pyplot as plt
import numpy as np
import neurolab as nl
input=np.array([[2. , 4. , 0. , 0. ],
[1.5, 3.9, 0. , 0. ],
[2.2, 4.1, 0. , 0. ],
[1.9, 4.7, 0. , 0. ],
[5.4, 2.2, 0. , 1. ],
[4.3, 7.1, 0. , 1. ],
[5.8, 4.9, 0. , 1. ],
[6.5, 3.2, 0. , 1. ],
[3. , 2. , 1. , 0. ],
[2.5, 0.5, 1. , 0. ],
[3.5, 2.1, 1. , 0. ],
[2.9, 0.3, 1. , 0. ],
[6.5, 8.3, 1. , 1. ],
[3.2, 6.2, 1. , 1. ],
[4.9, 7.8, 1. , 1. ],
[2.1, 4.8, 1. , 1. ]])
data=input[:,0:2]
target=input[:,:2]
plt.figure()
plt.scatter(data[:,0],data[:,1])
minx,maxx=data[:,0].min(),data[:,0].max()
miny,maxy=data[:,1].min(),data[:,1].max()
layer=target.shape[1]
dimx=[minx,maxx]
dimy=[miny,maxy]
#神经网络
map=nl.net.newp([dimx,dimy],layer) #为什么用最大值和最小值数组作为输入呢,layer为什么是标签数呢
#train
error=map.train(data,target,epochs=200,show=20,lr=0.01)#lr是什么
#plt
plt.figure()
plt.plot(error)
plt.xlabel("Numbers of epochs")
plt.ylabel("rating of error")
plt.grid()
plt.show()
data_test=[[1.5, 3.2], [3.6, 1.7], [3.6, 5.7],[1.6, 3.9]]
print(map.sim(data_test))
5.3多层神经网络
import matplotlib.pyplot as plt
import numpy as np
import neurolab as nl
min_val=-30
max_val=30
nums=160
x=np.linspace(min_val,max_val,nums)
y=2*np.square(x)+9
#标准化
y/=np.linalg.norm(y)
#将一维数组reshape为可训练的(160,1)
data=x.reshape(nums,1)
target=y.reshape(nums,1)
plt.figure()
plt.scatter(data,target)
plt.title("date point")
map=nl.net.newff([[min_val,max_val]],[10,6,1])#输入,隐藏层到输出层,10,6,1个神经元
map.trainf=nl.train.train_gd#梯度训练
error=map.train(data,target,epochs=1000,show=20,goal=0.01)
output=map.sim(data)
y_pred=output.reshape(nums)
plt.figure()
plt.plot(error)
plt.xlabel("Numbers of epochs")
plt.ylabel("rating of error")
plt.title("Error rating")
plt.grid()
x_test=np.linspace(min_val,max_val,nums*2)
y_test=map.sim(x_test.reshape(x_test.size,1)).reshape(x_test.size)
plt.figure()
plt.plot(x_test,y_test)
plt.title("test")
plt.show()
6.计算机视觉
低级视觉:特征提取
中级视觉:三维场景,物体识别
高级视觉:场景中行为,活动,意图识别
使用opencv库
import cv2
img=cv2.imread("picture.jpg")
cv2.imshow(img)
#灰度化
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
#生成图片
cv2.imwrite("gray.jpg",gray)
cv2.imshow(gray)
destoryallWindows()
人脸识别,眼睛识别
使用Haar级联分类器
import cv2
import numpy as np
detection=cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
img=cv2.imread('faces.jpg')
gray=cv2.cvColor(img,COLOR_BGR2GRAY)
faces=detection.detectMultiScale(gray,1.3,5)
#绘制矩形
for (x,y,w,h) in faces:
img=cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),3)
cv.imshow(img)
识别眼睛,使用haarcascade_eye.xml即可
更多推荐
所有评论(0)