class documentation

class LxmlParserLinkExtractor:

View In Hierarchy

Undocumented

Method __init__ Undocumented
Method ​_deduplicate​_if​_needed Undocumented
Method ​_extract​_links Undocumented
Method ​_iter​_links Undocumented
Method ​_process​_links Normalize and filter extracted links
Method extract​_links Undocumented
Instance Variable link​_key Undocumented
Instance Variable process​_attr Undocumented
Instance Variable scan​_attr Undocumented
Instance Variable scan​_tag Undocumented
Instance Variable strip Undocumented
Instance Variable unique Undocumented
def __init__(self, tag='a', attr='href', process=None, unique=False, strip=True, canonicalized=False):

Undocumented

def _deduplicate_if_needed(self, links):

Undocumented

def _extract_links(self, selector, response_url, response_encoding, base_url):

Undocumented

def _iter_links(self, document):

Undocumented

def _process_links(self, links):

Normalize and filter extracted links

The subclass should override it if neccessary

def extract_links(self, response):

Undocumented

link_key =

Undocumented

process_attr =

Undocumented

scan_attr =

Undocumented

scan_tag =

Undocumented

strip =

Undocumented

unique =

Undocumented