Having some problems here, and I apologize if this is simple. I'm getting the below error when trying to write a Scrapy pipeline to sqlite3.
Reading the error, it seems like something is wrong with 'Title TEXT,' if i'm understanding this error correctly, as the 'binding parameter 4' is 'item.get('Title'),'
The 'Title' is just pulling in tag elements from websites - and it's just extracting the text w/o any HTML around it.
line 48, in process_item
self.c.execute('''
sqlite3.InterfaceError: Error binding parameter 4 - probably unsupported type.
Below is the complete pipelines.py file that's associated with the above error.
Appreciate the help!
import sqlite3
class SQLlitePipeline(object):
collection_name = "testTable2"
def open_spider(self, spider):
self.connection = sqlite3.connect("testCrawl6.db")
self.c = self.connection.cursor()
self.c.execute('''
CREATE TABLE testTable4 (
Address TEXT,
Address_len INTEGER,
Content_Type TEXT,
Status_code INTEGER,
Title TEXT,
Title_len INTEGER,
Long_title TEXT,
Title_missing TEXT,
Meta_description TEXT,
Meta_description_count INTEGER,
Meta_keywords TEXT,
H1 TEXT,
H1_len INTEGER,
H1_missing TEXT,
H2 TEXT,
Robot TEXT,
Download_time TEXT,
Link_Anchor TEXT,
Link_href TEXT,
User_Agent TEXT
)
''')
self.connection.commit()
def close_spider(self, spider):
self.connection.close()
def process_item(self, item, spider):
self.c.execute('''
INSERT INTO testTable4 (Address,Address_len,Content_Type,Status_code,Title,Title_len,Long_title,Title_missing,Meta_description,Meta_description_count,Meta_keywords,H1,H1_len,H1_missing,H2,Robot,Download_time,Link_Anchor,Link_href,User_Agent) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
''', (
item.get('Address'),
item.get('Address_len'),
item.get('Content_Type'),
item.get('Status_code'),
item.get('Title'),
item.get('Title_len'),
item.get('Long_title'),
item.get('Title_missing'),
item.get('Meta_description'),
item.get('Meta_description_count'),
item.get('Meta_keywords'),
item.get('H1'),
item.get('H1_len'),
item.get('H1_missing'),
item.get('H2'),
item.get('Robot'),
item.get('Download_time'),
item.get('Link_Anchor'),
item.get('Link_href'),
item.get('User_Agent')
))
self.connection.commit()
return item