Request HTML Celery, RabbitMQ, and Redis

  To run need to do two things (while in folder of python filename.py): 1)  Run worker by executing the python program with the “worker” arguement: $ celery –A tasks worker –loglevel=info 2)  Call the task aka run: $ python filename.py ####################### # grabhtml.py import requests from html import unescape class GrabHTML(object): def __init__(self): pass…

SQL Server Create New Table And Insert Data using Python

Python Script to insert CSV File into SQL Server Database import pandas as pd import csv import pyodbc import sys, os USERNAME = ‘sa’ PASSWORD = ‘password’ SERVER = ‘server’ DATABASE = ‘DATA’ DRIVERNAME = ‘ODBC Driver 13 for SQL Server’ cnxn = pyodbc.connect(‘Driver={‘+DRIVERNAME+’};Server=’+SERVER+’;Database=’+DATABASE+’;uid=’+USERNAME+’;pwd=’+PASSWORD) cur = cnxn.cursor() ##### using pandas grab first row for column…

Click Through JavaScript Calendar and Download Excel Files

While this is by no means perfect, but it got the job done.  If interested in using need to change (highlighted white below) the website url, name of calendar id, and start_date.  #! py27wimport os, timefrom datetime import datetimefrom datetime import datefrom datetime import timedeltafrom selenium import webdriverfrom selenium.webdriver.firefox.firefox_profile import FirefoxProfilefrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as EC fp = webdriver.FirefoxProfile()fp.set_preference(‘browser.download.folderList’, 2)fp.set_preference(“browser.download.manager.showWhenStarting”, False)fp.set_preference(‘browser.download.dir’, os.getcwd())fp.set_preference(“browser.helperApps.neverAsk.saveToDisk”, ‘application/vnd.ms-excel’)fp.set_preference(“browser.download.dir”, “c:\tmp”);driver = webdriver.Firefox(firefox_profile=fp)driver.get(‘https://www.zacks.com/earnings/earnings-reports’) def click_calendar():    try:        element_xpath = ‘//*[@id=”earnings_release”]/div[1]/p/a’        element = WebDriverWait(driver, 10).until(                lambda driver : driver.find_element_by_xpath(element_xpath).click()        )    finally:        print “clicked calendar” def click_prev_day(x):    s = ‘datespan_%d’ % (x)    try:        WebDriverWait(driver, 10).until(            lambda driver : driver.find_element_by_id(s).click()        )    except:        result = False    else:        result = True    return result def click_export():    try:        element = WebDriverWait(driver, 10).until(            lambda driver : driver.find_element_by_id(‘export_excel’).click()        )    except:        result = False    else:        result = True    return result def click_prev_month():    try:        driver.find_element_by_id(‘prevCal’).click()    except:        result = False    else:        result = True    i = 31    while i > 27:        try:            click_prev_day(i)            return False        except:            print ‘could not find %s in prev month’ % (i)            i -= 1 def subtract_day(n):    y = n – 1    return y  def start_date():    return datetime(2016,2,29)    def click_to_start_date():    start_date = datetime(2016,2,28)    a = date.today()    b = start_date    c = a.month – b.month    if c > 0:        click_calendar()        while c > 0:            click_prev_month()            c -= 1        try:            click_prev_day(31)        except:            click_prev_day(30) def main():    #click_to_start_date()    #sdate = start_date()    m = 12    while m > 0:        m -= 1        for x in range(31,0,-1):            click_calendar()            click_prev_day(x)            click_export()         click_calendar()        click_prev_month()  if __name__ == ‘__main__’:     main() Few areas where need to improve: click_prev_month() – had little difficulty…

Remove characters left of first Space

#! py35# open file, process, and save output to new file stdin = r’C:projectstxt.txt’stdout = r’C:projectstext.csv’ with open(stdin, ‘r’, encoding=’utf-8′) as f:  # open input file    lines = f.readlines()                      # iterate one line at time    for line in lines:                          # for each line        l = line.split(‘ ‘, 1)[1]               # remove left of first space        with open(stdout, ‘a’) as fout:         # open output file            fout.write(l)                      …

Regex Find URLs

import re result = re.sub(p, subst, test_str)p = re.compile(ur‘(?i)b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^s()<>{}[]]+|([^s()]*?([^s()]+)[^s()]*?)|([^s]+?))+(?:([^s()]*?([^s()]+)[^s()]*?)|([^s]+?)|[^s`!()[]{};:’”.,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)b/?(?!@)))’)test_str = u“”subst = u“” Even longer Regex for finding URL

Create and Import CSV file into Database

Linkedin’s Industry Codes import sqlite3, os, csv conn = sqlite3.connect(‘industry_classifications’)curs = conn.cursor() tblcmd = ‘create table linkedin_industries (code int(3), groups char(60), description char(60))’curs = conn.execute(‘select * from linkedin_industries’)names = [description[0] for description in curs.description]namescurs.execute(tblcmd)curs.rowcount file = r’C:projectsLinkedinIndustry Codes.csv’ with open(file, ‘r’, encoding=’utf-8′) as f:    readCSV = csv.reader(f, delimiter=’,’)    for row in readCSV:        curs.execute(‘INSERT INTO linkedin_industries (code, groups, description) VALUES (?, ?, ?)’, row) curs.execute(‘select * from linkedin_industries’)for rec in curs.fetchall(): print(rec) conn.commit()