说明

一个url处理函数非常耗时，导致nginx在等待过程中超时。改成并发处理，问题解决。记录下代码片段。

代码

# 调用过程
 for obj in queryset:
            res1 = parse_domains(obj.ilink)
            if 'href_domains' in res1:
                threads = []
                for url in res1['href_domains']:
                    domain = url.split('/')[2]
                    #排除已存在的网站
                    if domain in exclude_urls:
                        continue
                    t = threading.Thread(target=processing_url_func, args=(url,))
                    threads.append(t)

                for p in threads:
                    p.start()

            obj.status = 0
            obj.save()

            #暂停4秒
            time.sleep(4)

# 子进程函数
def processing_url_func(url):
    """
    处理url
    """
    #获取网站信息
    res2 = get_web_info(url)
    if res2:
        #print(res2)
        iLi  = iLinks.objects.create(title=res2[0],note=res2[1],home=url)
        iLi.save()

# 获取url信息
def get_web_info(url):
    session = sessions()

    req_header = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}

    print(url)

    try:
        tag = 0
        resp = session.get(url, headers=req_header, allow_redirects=True, verify=False)
    except ReadTimeout as e:
        tag = 1
        print(e)
    except ConnectionError as e:
        tag = 2
        print(e)
    except RequestException as e:
        tag =3
        print(e)

    if tag > 0:
        return False

    soup = BeautifulSoup(resp.text, 'html.parser')

    title = ''
    note = ''

    if soup.title:
        title = _filter_emoji(soup.title.string)

    keywords = soup.find(attrs={"name":"keywords"})
    if keywords:
        note = _filter_emoji(keywords['content'])

    if title and note:
        return  [title,  note]
    else:
        return False