(defvar w3-allow-searching-of '("text/plain" "text/html" "text/x-setext" "application/x-troff-man" "application/x-troff-me" "application/x-troff-ms" "application/rtf" "text/richtext" "application/x-wais-source" "application/tex" "application/texinfo" "application/x-troff") "*A list of MIME content types that it is Ok for the automatic search to descend to.") (defun w3-do-search (term &optional base hops-limit restriction) "Recursively descend all the child links of the current document for TERM. TERM may be a string, in which case it is treated as a regular expression, and re-search-forward is used, or a symbol, in which case it is funcalled with 1 argument, the current URL being searched. BASE is the URL to start searching from. HOPS-LIMIT is the maximum number of nodes to descend before they search dies out. RESTRICTION is a regular expression or function to call with one argument, a URL that could be searched. If RESTRICTION returns non-nil, then the url is added to the queue, otherwise it is discarded. This is useful for restricting searching to either certain tyes of URLs (only search ftp links), or restricting searching to one domain (only search stuff in the indiana.edu domain). For use in functions passed to w3-do-search: QUEUE is the queue of links to be searched HOPS is the current number of hops from the root document RESULTS is an assoc list of (URL . RETVAL), where RETVAL is the value returned from previous calls to the TERM function (or point if searching for a regexp" (let ((x)) (or base (setq base (w3-view-url t))) (if (setq x (w3-buffer-visiting base)) (set-buffer x) (w3-fetch base)) (w3-search-internal term hops-limit restriction))) (defun w3-normalize-url (url) "Normalize a URL, removing all '#' references from it, etc." (cond ((null url) nil) ((string-match "#\\(.*\\)" url) (w3-match url 1)) (t url))) (defun w3-search-internal (term &optional hops-limit restriction) "Recursively descend all the child links of the current document for TERM. TERM may be a string, in which case it is treated as a regular expression, and re-search-forward is used, or a symbol, in which case it is funcalled with 1 argument, the current URL being searched. HOPS-LIMIT is the maximum number of nodes to descend before they search dies out. RESTRICTION is a regular expression or function to call with one argument, a URL that could be searched. If RESTRICTION returns non-nil, then the url is added to the queue, otherwise it is discarded. This is useful for restricting searching to either certain tyes of URLs (only search ftp links), or restricting searching to one domain (only search stuff in the indiana.edu domain). For use in functions passed to w3-do-search: QUEUE is the queue of links to be searched HOPS is the current number of hops from the root document RESULTS is an assoc list of (URL . RETVAL), where RETVAL is the value returned from previous calls to the TERM function (or point if searching for a regexp" (setq hops-limit (or hops-limit 5)) (let ((queue '()) (visited '()) (results nil) (hops 0)) ;; Search initial page and stick it in the results list (goto-char (point-min)) (cond ((stringp term) (setq results (cons (w3-view-url t) (re-search-forward term nil t)))) ((symbolp term) (setq results (cons (w3-view-url t) (funcall term (w3-view-url t)))))) ;; Build the initial queue of just the links on this page that are ;; deemed searchable (w3-map-links (function (lambda (x y) (if (and (w3-member (nth 8 (w3-file-attributes (nth 2 x))) w3-allow-searching-of) (cond ((null (nth 2 x)) nil) ((stringp restriction) (string-match restriction (nth 2 x))) ((symbolp restriction) (funcall restriction (nth 2 x))) (t t))) (setq queue (nconc queue (list (w3-normalize-url (nth 2 x))))))))) (while queue (let ((x (car queue)) y) (setq visited (cons x visited)) (if (setq y (w3-buffer-visiting x)) (set-buffer y) (w3-retrieve x)) (cond ((equal (or w3-current-mime-type (w3-extension-to-mime (w3-file-extension w3-current-file))) "text/html") (w3-prepare-buffer t) (w3-map-links (function (lambda (link-data searching-func) (let* ((url (w3-normalize-url (nth 2 link-data))) (info (and (cond ((null url) nil) ((stringp restriction) (string-match restriction url)) ((symbolp restriction) (funcall restriction url)) (t t)) (w3-file-attributes url))) (num-children 0)) (cond ((null info) (message "Skipping %s (not searchable)" url) nil) ((w3-member (nth 8 info) w3-allow-searching-of) (if (< hops hops-limit) (w3-map-links ; Count the child links (function ; and add them to the queue to (lambda (lnk arg) ; be serviced (setq num-children (1+ num-children)) (if (or (w3-member url visited) ; already seen it (w3-member url queue)) ; planning on seeing it nil (setq queue (nconc queue (list url)))))))) (goto-char (point-min)) (cond ((stringp term) (setq results (cons (cons url (re-search-forward term nil t)) results))) ((symbolp term) (setq results (cons (cons url (funcall term url)) results))) (t (error "TERM must be a regular expression or symbol.")))) (t (message "Skipping %s (why?)" url)))))))) (t (goto-char (point-min)) (cond ((stringp term) (setq results (cons (cons x (re-search-forward term nil t)) results))) ((symbolp term) (setq results (cons (cons x (funcall term x)) results))))))) (setq queue (cdr queue))) results))