请稍等 ...

我的代码中，没有办法读入为空返回也是空

# -*- coding:utf-8 -*-

from fake_useragent import UserAgent
from lxml import etree
import pandas as pd
import requests
import time
import os
import xlrd
import chardet
import traceback

def getColumnIndex(table, columnName):
    columnIndex = None
    #print table
    for i in range(table.ncols):
        #print columnName
        #print table.cell_value(0, i)
        if(table.cell_value(0, i) == columnName):
            columnIndex = i
            break
    return columnIndex
def readExcelDataByName(fileName, sheetName):
    #print fileName
    table = None
    errorMsg = ""
    try:
        data = xlrd.open_workbook(fileName)
        table = data.sheet_by_name(sheetName)
    except Exception as msg:
        errorMsg = msg
    return table, errorMsg

def readExcelDataByIndex(fileName, sheetIndex):
    table = None
    errorMsg = ""
    try:
        data = xlrd.open_workbook(fileName)
        table = data.sheet_by_index(sheetIndex)
    except Exception as msg:
        errorMsg = msg
    return table, errorMsg

#def get_all_page(url, page, headers, proxies):
def get_all_page(url, headers, proxies):
    # 打印当前页数
    # print('====================================================================================================')
    # print('========================================page:[', page ,']==================================================')
    # print('====================================================================================================')

    html_code = requests.get(url, headers=headers, proxies=proxies).text
    print(RootPath)
    # 单页中设备列表
    equip_items = etree.HTML(html_code).xpath(RootPath)
    print(equip_items)
    print(len(equip_items))
    for item in equip_items:
        # 设备详细信息

        try:
            title = item.xpath(TitlePath)[0]
            print(title)
            price = item.xpath(PricePath)[0] if item.xpath(PricePath) else 0
            print(price)
            link = 'https://www.amazon.cn/' + item.xpath(LinkPath)[0]
            print(link)
            stars = float(item.xpath(StarsPath)[0][:3].replace(',', '')) if item.xpath(StarsPath) else 0
            #print(stars)
            follows = int(item.xpath(FollowPath)[0].replace(',', '')) if item.xpath(FollowPath) else 0
            #print(follows)
            equip_list.append([title, price, stars, follows, link])
            print(f'商品名称：{title} 价格：{price} 评分：{stars} 收藏：{follows} 链接：{link}')
        except:
            continue
    # 获取下一页链接
    # next_page = etree.HTML(html_code).xpath(PagePath)
    # next_page_link = 'https://www.amazon.cn/' + next_page[0] if next_page else ''
    #
    # # 若存在下一页继续爬取
    # if next_page_link:
    #     if page < 11:
    #         page += 1
    #         get_all_page(next_page_link, page, headers, proxies)



if __name__ == '__main__':

    xlsfile = 'C:/Users/nikki/Desktop/excel/example.xlsx'
    table = readExcelDataByName(xlsfile, 'Sheet1')[0]
    line = 2
    # 获取第line行的值
    url = table.cell_value(line, getColumnIndex(table, 'url'))
    RootPath = table.cell_value(line, getColumnIndex(table, 'RootPath'))
    TitlePath = table.cell_value(line, getColumnIndex(table, 'TitlePath'))
    PricePath = table.cell_value(line, getColumnIndex(table, 'PricePath'))
    LinkPath = table.cell_value(line, getColumnIndex(table, 'LinkPath'))
    StarsPath = table.cell_value(line, getColumnIndex(table, 'StarsPath'))
    FollowPath = table.cell_value(line, getColumnIndex(table, 'FollowPath'))
    PagePath = table.cell_value(line, getColumnIndex(table, 'PagePath'))
    print(url)
    # 网页请求配置
    #url = 'https://www.amazon.cn/s?rh=n%3A42459071&brr=1&rd=1&ref=sa_menu_softwa_l2_b42459071'
    ua = UserAgent()
    headers = {
        'User-Agent': UserAgent().random
    }
    proxies = {
        'HTTPS': '182.99.154.21:4235'
    }

    equip_list = []

    # 爬取函数入口
    #get_all_page(url, page=1, headers=headers, proxies=proxies)
    get_all_page(url, headers=headers, proxies=proxies)
    # 结果保存为 DataFrame
    equips_df = pd.DataFrame(equip_list, columns=['title', 'price', 'stars', 'follows', 'link'])

    # 不存在则创建output文件夹
    if not os.path.isdir('output'):
        os.mkdir('output')

    # DataFrame结果输出到csv
   # equips_df.sort_values('follows', ascending=False).to_csv(f'output/equip_follows_rank.csv', sep=',',na_rep='NA', index=False)

是在第93行
异常返回的是：
url = table.cell_value(line, getColumnIndex(table, ‘url’))
AttributeError: ‘NoneType’ object has no attribute ‘cell_value’

左光斗 2019-11-11 15:59:17

源自：14-5 通过peewee对数据进行增、删、改、查...1

814

收起

提交取消

1回答

bobby 2019-11-12 10:57:34

从这个报错来看是table是none造成的，所以你要看看为什么读取excel没有读取到sheet，是没有sheet1这个标签是不是大小写写错了？

0 回复有任何疑惑可以回复我~

收起回答

提问者左光斗 #1

是没有这个标签吧应该
我预想的是 有就返回值
没有就反回空
应该如何改呢？

回复有任何疑惑可以回复我~ 2019-11-12 14:43:33

bobby 回复提问者左光斗 #2

readExcelDataByName这个函数是你自己定义的啊，你需要怎么返回就怎么返回啊 还不是由你自己确定啊

回复有任何疑惑可以回复我~ 2019-11-13 16:28:21

相似问题

返回的集合是空的

我的booklist是空值，如果在main.js中引入moc.js就不是空值了，但是无法点击阅读跳转到阅读器模块

我的地方坐标，在腾讯云地址里面，返回不出district。查询风险等级的本地宝可以不输入district吗？

老师你代码中的 PageHelper.startPage(pageNum, pageSize)为什么没返回值

商品编辑失败，productSevice.modifyProduct 返回空值，网页报错“商品为空”

登录后可查看更多问答，登录/注册

Python爬虫工程师实战大数据时代必备

参与学习 2367 人
解答问题 1157 个

慕课网严选精品教程，高质量内容+服务！

了解课程

本课精华内容

问答作业

代码问题

1.4k 20

老师我的jdproductPageComments 返回的是空白页什么也没有怎么办

1.6k 17

代码问题这个错误怎么办

1.2k 17

老师这个错误怎么解决

1.1k 17

使用selenium点击后返回异常数据

1.5k 13

查看更多本课问答

意见反馈帮助中心 APP下载

官方微信

我的代码中，没有办法读入为空返回也是空

正在回答回答被采纳积分+3

1回答

相似问题

请选择置顶位置

本课精华内容

代码问题

老师我的jdproductPageComments 返回的是空白页什么也没有怎么办

代码问题这个错误怎么办

老师这个错误怎么解决

使用selenium点击后返回异常数据

词云热力图应该如何实现？

【讨论题】滑动验证码的解决办法

如果合适的话使用 asyncio 做爬虫的优势是什么？

热搜

最近搜索清空

我的代码中，没有办法 读入为空 返回也是空

正在回答 回答被采纳积分+3

1回答

相似问题

请选择置顶位置

本课精华内容

代码问题

老师 我的jdproductPageComments 返回的是空白页 什么也没有 怎么办

代码问题这个错误怎么办

老师这个错误怎么解决

使用selenium点击后返回异常数据

词云热力图应该如何实现？

【讨论题】滑动验证码的解决办法

如果合适的话使用 asyncio 做爬虫的优势是什么？

我的代码中，没有办法读入为空返回也是空

正在回答回答被采纳积分+3

老师我的jdproductPageComments 返回的是空白页什么也没有怎么办