debug=False root_url = 'https://the.sketchengine.co.uk' base_url='%s/bonito/run.cgi/' % root_url #This does the logon and authentication. You'll need to do this first. #Both of the options below need the requests.Session object, "s", #which is set up below import requests username = '' password = '' login_url='https://the.sketchengine.co.uk/login/' logindata={'username' : username, 'password':password,'submit' : 'ok'} s=requests.Session() s.auth = (username, password) s.get(login_url) r=s.post(login_url,data=logindata) #You want this to be NOT an empty list!!! print("Cookies:"+str(s.cookies.keys())) if debug==True: print("request headers:"+str(r.request.headers)+"\n") print("request text:"+str(r.request)+"\n") print("response headers:"+str(r.headers)+"\n") print("response code:"+str(r.status_code)+"\n") # a test for demonstration using Sketch Engine through json interface import urllib.parse, json method = 'view' corp='bnc2' # creating query string attrs = dict(corpname=corp, q='', pagesize='1', format='json') # query_list can be read from a file, ... query_list = ['[lemma="test"]', '[lemma="drug"][lemma="test"]', '[lemma="blood"][lemma="test"]', '[lemma="test"][lemma="result"]' ] for query in query_list: attrs['q'] = 'q' + query encoded_attrs = urllib.parse.quote(json.dumps(attrs)) url = base_url + method + '?json=%s' % encoded_attrs #this is the requests.Session object we created and set up earlier r = s.get(url) json_obj = r.json() print(query + '\t' + str(json_obj.get('concsize', '0'))) #demonstration NOT using the json input interface import json import urllib.parse corp='bnc2' method='view' attrs = dict(corpname=corp, q='', pagesize='200', format='json') query_list = ['[lemma="test"]','[lemma="drug"][lemma="test"]','[lemma="blood"][lemma="test"]','[lemma="test"][lemma="result"]'] for query in query_list: attrs['q'] = 'q'+query encoded_attrs=urllib.parse.urlencode(attrs) url = base_url + method #The "s" in the line below is the requests.Session object we #created and set up earlier #The requests module can handle building the url parameter stuff #We just give it a dictionary (attrs) r=s.get(url,params=attrs) if debug==True: print(r.request.url) print(urllib.parse.unquote(r.request.url)) print(r.status_code) print(r.headers) # json data stuff # the requests module also handles the json output nicely. ;) json_obj = r.json() print(query + '\t' + str(json_obj.get('concsize', '0'))) #prints the fifth concordance line / entry print(json.dumps(json_obj["Lines"][5],sort_keys=True,indent=4))