diff --git a/README.md b/README.md index 785496d..7917f37 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,12 @@ Python 3 API for [USPS Informed Delivery](https://my.usps.com/mobileWeb/pages/in Sign up for Informed Delivery and verify your address. -### Chrome - -Install Google Chrome and Chromedriver. These are dependencies for the Selenium webdriver, which is used internally to this module to facilitate the login process. - -Instructions (adapt as necessary for your OS): - - Ubuntu 16: https://gist.github.com/ziadoz/3e8ab7e944d02fe872c3454d17af31a5 - - RHEL 7: https://stackoverflow.com/a/46686621 - -Note that installing Selenium Server is not required. - ## Install -`pip install myusps` +```shell +pip install myusps +playwright install +``` ## Usage @@ -32,7 +25,8 @@ import myusps # Establish a session. # Use the login credentials you use to login to My USPS via the web. # A login failure raises a `USPSError`. -session = myusps.get_session("username", "password") +# Webdriver options are 'firefox', 'chrome', and 'webkit' +session = myusps.get_session("username", "password", driver="firefox") # Get your profile information as a dict. Includes name, address, phone, etc. profile = myusps.get_profile(session) diff --git a/myusps/__init__.py b/myusps/__init__.py index ccf36b7..5d5ea29 100644 --- a/myusps/__init__.py +++ b/myusps/__init__.py @@ -4,17 +4,12 @@ import logging import os.path import pickle -import re -from bs4 import BeautifulSoup +import lxml.html from dateutil.parser import parse import requests from requests.auth import AuthBase import requests_cache -from selenium import webdriver -from selenium.common.exceptions import TimeoutException, WebDriverException -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.firefox.options import Options +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError _LOGGER = logging.getLogger(__name__) @@ -33,12 +28,6 @@ ATTRIBUTION = 'Information provided by www.usps.com' USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) ' \ 'Chrome/41.0.2228.0 Safari/537.36' -CHROME_WEBDRIVER_ARGS = [ - '--headless', '--user-agent={}'.format(USER_AGENT), '--disable-extensions', - '--disable-gpu', '--no-sandbox' -] -FIREFOXOPTIONS = Options() -FIREFOXOPTIONS.add_argument("--headless") class USPSError(Exception): @@ -62,15 +51,15 @@ def _load_cookies(filename): def _get_primary_status(row): """Get package primary status.""" try: - return row.find('div', {'class': 'pack_h3'}).string - except AttributeError: + return row.xpath(".//div[contains(@class,'pack_h3')]")[0].text.strip() + except IndexError: return None def _get_secondary_status(row): """Get package secondary status.""" try: - return row.find('div', {'id': 'coltextR3'}).contents[1] + return row.xpath(".//div[@id='coltextR3']/text()")[1].strip() except (AttributeError, IndexError): return None @@ -78,10 +67,10 @@ def _get_secondary_status(row): def _get_shipped_from(row): """Get where package was shipped from.""" try: - spans = row.find('div', {'id': 'coltextR2'}).find_all('span') + spans = row.xpath(".//div[@id='coltextR2']/span") if len(spans) < 2: return None - return spans[1].string + return spans[2].text except AttributeError: return None @@ -89,10 +78,10 @@ def _get_shipped_from(row): def _get_status_timestamp(row): """Get latest package timestamp.""" try: - divs = row.find('div', {'id': 'coltextR3'}).find_all('div') + divs = row.xpath(".//div[@id='coltextR3']/div") if len(divs) < 2: return None - timestamp_string = divs[1].string + timestamp_string = divs[1].text except AttributeError: return None try: @@ -104,8 +93,8 @@ def _get_status_timestamp(row): def _get_delivery_date(row): """Get delivery date (estimated or actual).""" try: - month = row.find('div', {'class': 'date-small'}).string - day = row.find('div', {'class': 'date-num-large'}).string + month = row.xpath(".//div[contains(@class,'date-small')]")[0].text.strip() + day = row.xpath(".//div[contains(@class,'date-num-large')]")[0].text.strip() except AttributeError: return None try: @@ -117,15 +106,15 @@ def _get_delivery_date(row): def _get_tracking_number(row): """Get package tracking number.""" try: - return row.find('div', {'class': 'pack_h4'}).string - except AttributeError: + return row.xpath(".//div[@class='pack_h4']")[0].text.strip() + except IndexError: return None def _get_mailpiece_image(row): """Get mailpiece image url.""" try: - return row.find('img', {'class': 'mailpieceIMG'}).get('src') + return row.xpath(".//img[@class='mailpieceIMG']/@src")[0] except AttributeError: return None @@ -141,21 +130,8 @@ def _get_mailpiece_url(image): """Get mailpiece url.""" return '{}{}'.format(INFORMED_DELIVERY_IMAGE_URL, image) -def _get_driver(driver_type): - """Get webdriver.""" - if driver_type == 'phantomjs': - return webdriver.PhantomJS(service_log_path=os.path.devnull) - if driver_type == 'firefox': - return webdriver.Firefox(firefox_options=FIREFOXOPTIONS) - elif driver_type == 'chrome': - chrome_options = webdriver.ChromeOptions() - for arg in CHROME_WEBDRIVER_ARGS: - chrome_options.add_argument(arg) - return webdriver.Chrome(chrome_options=chrome_options) - else: - raise USPSError('{} not supported'.format(driver_type)) -def _login(session): +def _login(session, driver, headless): """Login. Use Selenium webdriver to login. USPS authenticates users @@ -171,23 +147,35 @@ def _login(session): session.remove_expired_responses() except AttributeError: pass - try: - driver = _get_driver(session.auth.driver) - except WebDriverException as exception: - raise USPSError(str(exception)) - driver.get(LOGIN_URL) - username = driver.find_element_by_name('username') - username.send_keys(session.auth.username) - password = driver.find_element_by_name('password') - password.send_keys(session.auth.password) - driver.find_element_by_id('btn-submit').click() - try: - WebDriverWait(driver, LOGIN_TIMEOUT).until(EC.title_is(WELCOME_TITLE)) - except TimeoutException: - raise USPSError('login failed') - for cookie in driver.get_cookies(): - session.cookies.set(name=cookie['name'], value=cookie['value']) - _save_cookies(session.cookies, session.auth.cookie_path) + + with sync_playwright() as p: + if driver == "chrome": + browser = p.chromium.launch(headless=headless) + elif driver == "firefox": + browser = p.firefox.launch(headless=headless) + elif driver == "webkit": + browser = p.webkit.launch(headless=headless) + else: + raise USPSError('{} not supported'.format(driver)) + + context = browser.new_context(user_agent=USER_AGENT) + page = context.new_page() + page.goto(LOGIN_URL) + + page.locator("xpath=//input[@id='username']").type(session.auth.username) + page.locator("xpath=//input[@id='password']").type(session.auth.password) + + page.locator("xpath=//button[@id='btn-submit']").click() + + try: + page.wait_for_function("document.title === '{}'".format(WELCOME_TITLE)) + except PlaywrightTimeoutError: + raise USPSError('login failed') + + for cookie in context.cookies(): + session.cookies.set(name=cookie["name"], value=cookie["value"]) + + _save_cookies(session.cookies, session.auth.cookie_path) def _get_dashboard(session, date=None): @@ -223,15 +211,13 @@ def get_profile(session): response = session.get(PROFILE_URL, allow_redirects=False) if response.status_code == 302: raise USPSError('expired session') - parsed = BeautifulSoup(response.text, HTML_PARSER) - profile = parsed.find('div', {'class': 'atg_store_myProfileInfo'}) + parsed = lxml.html.fromstring(response.text) + profile = parsed.xpath("//div[@class='atg_store_myProfileInfo']")[0] data = {} - for row in profile.find_all('tr'): - cells = row.find_all('td') + for row in profile.xpath('.//tr'): + cells = row.xpath('.//td') if len(cells) == 2: - key = ' '.join(cells[0].find_all(text=True)).strip().lower().replace(' ', '_') - value = ' '.join(cells[1].find_all(text=True)).strip() - data[key] = value + data[cells[0].text.strip()] = cells[1].text.strip() return data @@ -240,9 +226,9 @@ def get_packages(session): """Get package data.""" _LOGGER.info("attempting to get package data") response = _get_dashboard(session) - parsed = BeautifulSoup(response.text, HTML_PARSER) + parsed = lxml.html.fromstring(response.text) packages = [] - for row in parsed.find_all('div', {'class': 'pack_row'}): + for row in parsed.xpath("//div[@class='pack_row']"): packages.append({ 'tracking_number': _get_tracking_number(row), 'primary_status': _get_primary_status(row), @@ -261,9 +247,9 @@ def get_mail(session, date=None): if not date: date = datetime.datetime.now().date() response = _get_dashboard(session, date) - parsed = BeautifulSoup(response.text, HTML_PARSER) + parsed = lxml.html.fromstring(response.text) mail = [] - for row in parsed.find_all('div', {'class': 'mailpiece'}): + for row in parsed.xpath("//div[@class='mailpiece']"): image = _get_mailpiece_image(row) if not image: continue @@ -276,17 +262,16 @@ def get_mail(session, date=None): # pylint: disable=too-many-arguments def get_session(username, password, cookie_path=COOKIE_PATH, cache=True, - cache_expiry=300, cache_path=CACHE_PATH, driver='phantomjs'): + cache_expiry=300, cache_path=CACHE_PATH, driver='chrome', headless=False): """Get session, existing or new.""" class USPSAuth(AuthBase): # pylint: disable=too-few-public-methods """USPS authorization storage.""" - def __init__(self, username, password, cookie_path, driver): + def __init__(self, username, password, cookie_path): """Init.""" self.username = username self.password = password self.cookie_path = cookie_path - self.driver = driver def __call__(self, r): """Call is no-op.""" @@ -294,13 +279,12 @@ def __call__(self, r): session = requests.Session() if cache: - session = requests_cache.core.CachedSession(cache_name=cache_path, - expire_after=cache_expiry) - session.auth = USPSAuth(username, password, cookie_path, driver) + session = requests_cache.CachedSession(cache_name=cache_path, expire_after=cache_expiry) + session.auth = USPSAuth(username, password, cookie_path) session.headers.update({'User-Agent': USER_AGENT}) if os.path.exists(cookie_path): _LOGGER.debug("cookie found at: %s", cookie_path) session.cookies = _load_cookies(cookie_path) else: - _login(session) - return session \ No newline at end of file + _login(session, driver, headless) + return session diff --git a/setup.py b/setup.py index e1fb031..1d30fe2 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,8 @@ author='happyleaves', author_email='happyleaves.tfr@gmail.com', packages=find_packages(), - install_requires=['beautifulsoup4==4.6.0', 'python-dateutil==2.6.0', 'requests>=2.20.0', 'requests-cache==0.4.13', 'selenium==3.11.0'], + install_requires=['lxml==5.2.2', 'python-dateutil==2.9.0.post0', 'requests>=2.32.3', 'requests-cache==1.2.1', + 'playwright==1.44.0'], classifiers=[ 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent',