import numpy as np
import librosa
from sklearn.metrics.pairwise import cosine_similarity
# 1. 预处理音频
def preprocess_audio(file_path):
y, sr = librosa.load(file_path, sr=None)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
return mfcc
# 2. 比较音频特征与标准文本
def compare_to_standard(mfcc_input, mfcc_reference):
# 计算两个MFCC之间的余弦相似度
similarity = cosine_similarity(mfcc_input.T, mfcc_reference.T)
# 返回平均相似度作为评分
score = np.mean(similarity)
return score
# 3. 打分函数
def grade_pronunciation(audio_file, reference_file):
mfcc_input = preprocess_audio(audio_file)
mfcc_reference = preprocess_audio(reference_file)
score = compare_to_standard(mfcc_input, mfcc_reference)
return score
# 使用实例
audio_file = 'input_speech.wav'
reference_file = 'reference_speech.wav'
score = grade_pronunciation(audio_file, reference_file)
print(f"Pronunciation Score: {score:.2f}")
import scrapy
from scrapy.http import Request
from scrapy.utils.project import get_project_settings
class MouserSpider(scrapy.Spider):
name = 'mouser'
allowed_domains = ['mouser.cn']
start_urls = ['https://www.mouser.cn/electronic-components/']
def parse(self, response):
# 解析器件信息
for product in response.css('.search-results .result-item'):
yield {
'name': product.css('.result-title::text').get(),
'price': product.css('.result-price::text').get(),
'link': product.css('a::attr(href)').get(),
}
# 翻页逻辑
next_page = response.css('a.pagination-next::attr(href)').get()
if next_page:
yield Request(url=response.urljoin(next_page), callback=self.parse)
def start_requests(self):
settings = get_project_settings()
proxy = settings.get('HTTP_PROXY')
for url in self.start_urls:
yield Request(url=url, callback=self.parse, meta={'proxy': proxy})