请稍等 ...
×

采纳答案成功!

向帮助你的同学说点啥吧!感谢那些助人为乐的人

我的代码中,没有办法 读入为空 返回也是空

# -*- coding:utf-8 -*-

from fake_useragent import UserAgent
from lxml import etree
import pandas as pd
import requests
import time
import os
import xlrd
import chardet
import traceback

def getColumnIndex(table, columnName):
    columnIndex = None
    #print table
    for i in range(table.ncols):
        #print columnName
        #print table.cell_value(0, i)
        if(table.cell_value(0, i) == columnName):
            columnIndex = i
            break
    return columnIndex
def readExcelDataByName(fileName, sheetName):
    #print fileName
    table = None
    errorMsg = ""
    try:
        data = xlrd.open_workbook(fileName)
        table = data.sheet_by_name(sheetName)
    except Exception as msg:
        errorMsg = msg
    return table, errorMsg

def readExcelDataByIndex(fileName, sheetIndex):
    table = None
    errorMsg = ""
    try:
        data = xlrd.open_workbook(fileName)
        table = data.sheet_by_index(sheetIndex)
    except Exception as msg:
        errorMsg = msg
    return table, errorMsg

#def get_all_page(url, page, headers, proxies):
def get_all_page(url, headers, proxies):
    # 打印当前页数
    # print('====================================================================================================')
    # print('========================================page:[', page ,']==================================================')
    # print('====================================================================================================')

    html_code = requests.get(url, headers=headers, proxies=proxies).text
    print(RootPath)
    # 单页中设备列表
    equip_items = etree.HTML(html_code).xpath(RootPath)
    print(equip_items)
    print(len(equip_items))
    for item in equip_items:
        # 设备详细信息

        try:
            title = item.xpath(TitlePath)[0]
            print(title)
            price = item.xpath(PricePath)[0] if item.xpath(PricePath) else 0
            print(price)
            link = 'https://www.amazon.cn/' + item.xpath(LinkPath)[0]
            print(link)
            stars = float(item.xpath(StarsPath)[0][:3].replace(',', '')) if item.xpath(StarsPath) else 0
            #print(stars)
            follows = int(item.xpath(FollowPath)[0].replace(',', '')) if item.xpath(FollowPath) else 0
            #print(follows)
            equip_list.append([title, price, stars, follows, link])
            print(f'商品名称:{title} 价格:{price} 评分:{stars} 收藏:{follows} 链接:{link}')
        except:
            continue
    # 获取下一页链接
    # next_page = etree.HTML(html_code).xpath(PagePath)
    # next_page_link = 'https://www.amazon.cn/' + next_page[0] if next_page else ''
    #
    # # 若存在下一页继续爬取
    # if next_page_link:
    #     if page < 11:
    #         page += 1
    #         get_all_page(next_page_link, page, headers, proxies)



if __name__ == '__main__':

    xlsfile = 'C:/Users/nikki/Desktop/excel/example.xlsx'
    table = readExcelDataByName(xlsfile, 'Sheet1')[0]
    line = 2
    # 获取第line行的值
    url = table.cell_value(line, getColumnIndex(table, 'url'))
    RootPath = table.cell_value(line, getColumnIndex(table, 'RootPath'))
    TitlePath = table.cell_value(line, getColumnIndex(table, 'TitlePath'))
    PricePath = table.cell_value(line, getColumnIndex(table, 'PricePath'))
    LinkPath = table.cell_value(line, getColumnIndex(table, 'LinkPath'))
    StarsPath = table.cell_value(line, getColumnIndex(table, 'StarsPath'))
    FollowPath = table.cell_value(line, getColumnIndex(table, 'FollowPath'))
    PagePath = table.cell_value(line, getColumnIndex(table, 'PagePath'))
    print(url)
    # 网页请求配置
    #url = 'https://www.amazon.cn/s?rh=n%3A42459071&brr=1&rd=1&ref=sa_menu_softwa_l2_b42459071'
    ua = UserAgent()
    headers = {
        'User-Agent': UserAgent().random
    }
    proxies = {
        'HTTPS': '182.99.154.21:4235'
    }

    equip_list = []

    # 爬取函数入口
    #get_all_page(url, page=1, headers=headers, proxies=proxies)
    get_all_page(url, headers=headers, proxies=proxies)
    # 结果保存为 DataFrame
    equips_df = pd.DataFrame(equip_list, columns=['title', 'price', 'stars', 'follows', 'link'])

    # 不存在则创建output文件夹
    if not os.path.isdir('output'):
        os.mkdir('output')

    # DataFrame结果输出到csv
   # equips_df.sort_values('follows', ascending=False).to_csv(f'output/equip_follows_rank.csv', sep=',',na_rep='NA', index=False)

是在第93行
异常返回的是:
url = table.cell_value(line, getColumnIndex(table, ‘url’))
AttributeError: ‘NoneType’ object has no attribute ‘cell_value’

正在回答 回答被采纳积分+3

1回答

bobby 2019-11-12 10:57:34

从这个报错来看是table是none造成的,所以你要看看为什么读取excel没有读取到sheet,是没有sheet1这个标签是不是大小写写错了?

0 回复 有任何疑惑可以回复我~
  • 提问者 左光斗 #1
    是没有这个标签吧应该
    我预想的是 有就返回值
    没有就反回空
    应该如何改呢?
    回复 有任何疑惑可以回复我~ 2019-11-12 14:43:33
  • bobby 回复 提问者 左光斗 #2
    readExcelDataByName这个函数是你自己定义的啊,你需要怎么返回就怎么返回啊 还不是由你自己确定啊
    回复 有任何疑惑可以回复我~ 2019-11-13 16:28:21
问题已解决,确定采纳
还有疑问,暂不采纳
意见反馈 帮助中心 APP下载
官方微信