Skip to content
This repository was archived by the owner on Apr 1, 2021. It is now read-only.

Commit 842545b

Browse files
authored
Merge pull request #81 from macs1207/master
Fix announcement parser
2 parents 3684c2f + c50f6df commit 842545b

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

src/crawler/school_announcements_crawler.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from lxml import etree
22
import requests
3+
import re
34
from utils import error_code
45
from utils import config
56

@@ -30,9 +31,9 @@ def acad(page=0):
3031

3132
if req.status_code == 200:
3233
req = req.json()['content']
33-
3434
root = etree.HTML(req)
35-
date = root.xpath('//*[@class="mdate before"]')
35+
node = root.xpath('//*[@class="d-txt"]')
36+
date = [node[i] for i in range(0, len(node), 3)]
3637
href = root.xpath('//*[@class="d-txt"]//a')
3738

3839
base_id = page*15
@@ -42,7 +43,7 @@ def acad(page=0):
4243
'info':{
4344
'id': base_id+index,
4445
'title': href_data.attrib['title'],
45-
'date':date_time.text
46+
'date': re.search("([12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01]))", date_time.text).group()
4647
}
4748
} for index, (date_time, href_data) in enumerate(zip(date, href))]
4849
return notification

0 commit comments

Comments
 (0)