commit 33418e9969d8ab13994759d0957dfc73386c1fa3
parent 6896ad8e12f2fbf6e522fb7c1936124224426325
Author: Ryan Culpepper <ryanc@racket-lang.org>
Date: Mon, 6 Sep 2010 23:00:47 -0600
added tool for detecting useless requires
original commit: 221d07626ef1f90b1be620f141c318884d141871
Diffstat:
1 file changed, 518 insertions(+), 0 deletions(-)
diff --git a/collects/macro-debugger/analysis/check-requires.rkt b/collects/macro-debugger/analysis/check-requires.rkt
@@ -0,0 +1,518 @@
+#lang racket/base
+(require racket/contract
+ racket/match
+ racket/dict
+ unstable/struct
+ syntax/modresolve
+ syntax/stx
+ syntax/id-table
+ macro-debugger/model/deriv
+ "util.rkt")
+(provide/contract
+ [check-requires (-> module-path? list?)]
+ [add-disappeared-uses? (parameter/c boolean?)]
+ [mpi->key (-> module-path-index? any/c)])
+
+#|
+
+The purpose of this script is to estimate a module's useless requires.
+
+Usage:
+
+ (check-requires <module-name>)
+
+Examples:
+
+ (check-requires 'typed-scheme)
+ (check-requires 'unstable/markparam)
+ (check-requires 'macro-debugger/syntax-browser/widget)
+
+The procedure prints one line per (non-label) require in the following
+format:
+
+ KEEP <module> at <phase> <optional-comment>
+ - The require must be kept because bindings defined within it are used.
+ - The optional comment indicates if the require must be kept
+ - only because its bindings are re-exported
+ - only because the whitelist DB says so
+
+ BYPASS <module> at <phase>
+ - The require is used, but only for bindings that could be more directly
+ obtained via another module. For example, 'racket' can be bypassed in favor
+ of some subset of 'racket/base', 'racket/contract', etc.
+
+ DROP <module> at <phase>
+ - The require appears to be unused. Unless it must be kept for side
+ effects or for bindings of a very unusual macro, it can be dropped
+ entirely.
+
+Notes:
+
+ BYPASS recommendations should often be disregarded, because the
+ required module is expressly intended as an aggregation module and the
+ only way to bypass it would be to require private modules
+ directly. See TODO for plans to improve BYPASS recommendations.
+
+ Ignore recommendations to DROP or BYPASS modules with side
+ effects. Read the section below (How it works) and also see the docs
+ for 'module-db' for whitelisting side-effectful modules.
+
+ The script is not intelligent about the language, which causes
+ certain spurious recommendations to appear frequently. For example,
+
+ DROP scheme/mzscheme at 1
+
+ means that the module's language is mzscheme, which automatically
+ inserts (require-for-syntax scheme/mzscheme). It's infeasible to
+ remove it except by rewriting the module in scheme/base or
+ racket/base.
+
+========
+
+How it works
+
+Determining whether a require is actually useless is impossible: a
+module may be required for compile-time side effect only, and there's
+no way to monitor that.
+
+Here are some approximations that are feasible to calculate:
+
+NOM-USES = A require R is "used" by a module M if, during the
+compilation of M, a reference is resolved to a binding exported by R.
+
+DEF-USES = A require R is "used" by a module M if, during the
+compilation of M, a reference is resolved to a binding defined in R.
+
+The limitations:
+ - misses side-effects
+ - misses identifiers recognized via 'free-identifier=?'
+ (But those should be recorded as 'disappeared-use anyway.)
+
+|#
+
+;; ========
+
+(define add-disappeared-uses? (make-parameter #t))
+
+;; ========
+
+;; A RefTable = hash[Phase => free-id-table[bool]]
+;; Phase = nat
+
+#|
+
+For the calculations at the end, we only want to consider identifiers
+from the expanded module (ie, syntax-source-module = here.)
+
+That means that instead of a free-id-table, we really want a dict/set
+that distinguishes between identifiers imported different ways. eg,
+hash keyed on (nom-name, nom-mod). The reason is that a not-from-here
+identifier can block/clobber a from-here identifier if they happen to
+refer to the same binding. That messes up the analysis.
+
+Temporary solution: only add from-here identifiers to the reftable.
+
+|#
+
+;; new-reftable : -> RefTable
+(define (new-reftable)
+ (make-hash))
+
+;; reftable-get-phase : RefTable Phase -> free-id-table[bool]
+(define (reftable-get-phase refs phase)
+ (hash-ref! refs phase (lambda () (make-free-id-table #:phase phase))))
+
+;; reftable-add-all! : RefTable Phase (listof identifier) -> void
+(define (reftable-add-all! refs phase ids)
+ (let ([id-table (reftable-get-phase refs phase)])
+ (for ([id (in-list ids)]
+ #:when (here-mpi? (syntax-source-module id)))
+ (free-id-table-set! id-table id #t))))
+
+;; reftable-add! : RefTable Phase identifier -> void
+(define (reftable-add! refs phase id)
+ (reftable-add-all! refs phase (list id)))
+
+;; ========
+
+;; phase : (parameterof nat)
+(define phase (make-parameter 0))
+
+;; ========
+
+;; analyze : *Deriv* RefTable -> void
+;; *Deriv* = Deriv | LDeriv | BRule | ModRule | ... (anything from deriv.rkt)
+(define (analyze deriv refs)
+ (define (recur . args)
+ (let check ([arg args])
+ (cond [(syntax? arg) (error 'whoops "arg = ~s" arg)]
+ [(list? arg) (for-each check arg)]
+ [else (void)]))
+ (for ([arg (in-list args)])
+ (if (list? arg)
+ (apply recur arg)
+ (analyze arg refs))))
+ (define (add! ids)
+ (reftable-add-all! refs (phase) ids))
+
+ ;; (printf "analyze ~.s\n" deriv)
+
+ ;; Handle common base (ie, resolves) part of derivs, if applicable
+ (match deriv
+ [(base z1 z2 resolves ?1)
+ (add! resolves)
+ (when (and (syntax? z2) (add-disappeared-uses?))
+ (let ([uses (syntax-property z2 'disappeared-use)])
+ (add! (let loop ([x uses] [onto null])
+ (cond [(identifier? x) (cons x onto)]
+ [(pair? x) (loop (car x) (loop (cdr x) onto))]
+ [else onto])))))]
+ [_
+ (void)])
+ ;; Handle individual variants
+ (match deriv
+ [(lift-deriv z1 z2 first lift-stx second)
+ (recur first second)]
+ [(tagrule z1 z2 tagged-stx next)
+ (recur next)]
+ [(lift/let-deriv z1 z2 first lift-stx second)
+ (recur first second)]
+
+ [(mrule z1 z2 rs ?1 me1 locals me2 ?2 etx next)
+ (recur locals next)]
+ [(local-exn exn)
+ (void)]
+ [(local-expansion z1 z2 for-stx? me1 inner lifted me2 opaque)
+ (parameterize ((phase (+ (phase) (if for-stx? 1 0))))
+ (recur inner))]
+ [(local-lift expr ids)
+ (void)]
+ [(local-lift-end decl)
+ (void)]
+ [(local-lift-require req expr mexpr)
+ (void)]
+ [(local-lift-provide prov)
+ (void)]
+ [(local-bind names ?1 renames bindrhs)
+ (recur bindrhs)]
+ [(local-value name ?1 resolves bound?)
+ (when (and bound? resolves)
+ (add! (cons name resolves)))]
+ [(track-origin before after)
+ (void)]
+ [(local-remark contents)
+ (void)]
+
+ [(p:variable z1 z2 rs ?1)
+ (void)]
+ [(p:module z1 z2 rs ?1 locals tag rename check tag2 ?3 body shift)
+ (recur locals check body)]
+ [(p:#%module-begin z1 z2 rs ?1 me pass1 pass2 ?2)
+ (recur pass1 pass2)]
+ [(p:define-syntaxes z1 z2 rs ?1 rhs locals)
+ (parameterize ((phase (+ (phase) 1)))
+ (recur rhs locals))]
+ [(p:define-values z1 z2 rs ?1 rhs)
+ (recur rhs)]
+
+ [(p:#%expression z1 z2 rs ?1 inner untag)
+ (recur inner)]
+ [(p:if z1 z2 rs ?1 test then else)
+ (recur test then else)]
+ [(p:wcm z1 z2 rs ?1 key mark body)
+ (recur key mark body)]
+ [(p:set! _ _ _ _ id-resolves ?2 rhs)
+ (add! id-resolves)
+ (recur rhs)]
+ [(p:set!-macro _ _ _ _ deriv)
+ (recur deriv)]
+ [(p:#%app _ _ _ _ lderiv)
+ (recur lderiv)]
+ [(p:begin _ _ _ _ lderiv)
+ (recur lderiv)]
+ [(p:begin0 _ _ _ _ first lderiv)
+ (recur first lderiv)]
+
+ [(p:lambda _ _ _ _ renames body)
+ (recur body)]
+ [(p:case-lambda _ _ _ _ renames+bodies)
+ (recur renames+bodies)]
+ [(p:let-values _ _ _ _ renames rhss body)
+ (recur rhss body)]
+ [(p:letrec-values _ _ _ _ renames rhss body)
+ (recur rhss body)]
+ [(p:letrec-syntaxes+values _ _ _ _ srenames sbindrhss vrenames vrhss body tag)
+ (recur sbindrhss vrhss body)]
+
+ [(p:provide _ _ _ _ inners ?2)
+ (recur inners)]
+
+ [(p:require _ _ _ _ locals)
+ (recur locals)]
+
+ [(p:#%stratified-body _ _ _ _ bderiv)
+ (recur bderiv)]
+
+ [(p:stop _ _ _ _) (void)]
+ [(p:unknown _ _ _ _) (void)]
+ [(p:#%top _ _ _ _)
+ (void)]
+ [(p:#%datum _ _ _ _) (void)]
+ [(p:quote _ _ _ _) (void)]
+ [(p:quote-syntax z1 z2 _ _)
+ (when z2 (analyze/quote-syntax z2 refs))]
+ [(p:#%variable-reference _ _ _ _)
+ ;; FIXME
+ (void)]
+
+ [(lderiv _ _ ?1 derivs)
+ (recur derivs)]
+
+ [(bderiv _ _ pass1 trans pass2)
+ (recur pass1 pass2)]
+
+ [(b:error ?1)
+ (void)]
+ [(b:expr _ head)
+ (recur head)]
+ [(b:splice _ head ?1 tail ?2)
+ (recur head)]
+ [(b:defvals _ head ?1 rename ?2)
+ (recur head)]
+ [(b:defstx _ head ?1 rename ?2 bindrhs)
+ (recur head bindrhs)]
+
+ [(bind-syntaxes rhs locals)
+ (parameterize ((phase (+ 1 (phase))))
+ (recur rhs locals))]
+
+ [(clc ?1 renames body)
+ (recur body)]
+
+ [(mod:prim head rename prim)
+ (recur head prim)]
+ [(mod:splice head rename ?1 tail)
+ (recur head)]
+ [(mod:lift head renames tail)
+ (recur head)]
+ [(mod:lift-end tail)
+ (void)]
+ [(mod:cons head)
+ (recur head)]
+ [(mod:skip)
+ (void)]
+
+ [(ecte _ _ locals first second locals2)
+ (recur locals first second locals2)]
+
+ [#f
+ (void)]))
+
+;; analyze/quote-syntax : stx RefTable -> void
+;; Current approach: estimate that an identifier in a syntax template
+;; may be used at (sub1 (phase)) or (phase).
+;; FIXME: Allow for more conservative choices, too.
+;; FIXME: #%top, #%app, #%datum, etc?
+;; FIXME: Track tentative (in quote-syntax) references separately?
+(define (analyze/quote-syntax qs-stx refs)
+ (let ([phases (list (phase) (sub1 (phase)))]
+ [stx (syntax-case qs-stx ()
+ [(_quote-syntax x) #'x])])
+ (define (add! id)
+ (for ([phase (in-list phases)])
+ (reftable-add! refs phase id)))
+ (let loop ([stx stx])
+ (let ([d (if (syntax? stx) (syntax-e stx) stx)])
+ (cond [(identifier? stx) (add! stx)]
+ [(pair? d)
+ (loop (car d))
+ (loop (cdr d))]
+ [(vector? d)
+ (map loop (vector->list d))]
+ [(prefab-struct-key d)
+ (map loop (struct->list d))]
+ [(box? d)
+ (loop (unbox d))]
+ [else
+ (void)])))))
+
+;; ========
+
+;; sMPI = S-expr form of mpi (see mpi->key)
+;; Using MPIs doesn't work. I conjecture that the final module shift means that
+;; all during-expansion MPIs are different from all compiled-expr MPIs.
+
+;; A UsedTable = hash[(list int sMPI) => list]
+
+;; calculate-used-approximations : RefTable -> (values UsedTable UsedTable)
+(define (calculate-used-approximations refs)
+ (let ([NOM-USES (make-hash)]
+ [DEF-USES (make-hash)])
+ (for* ([(use-phase id-table) (in-hash refs)]
+ [id (in-dict-keys id-table)])
+ ;; Only look at identifiers written in module being examined.
+ ;; (Otherwise, nom-mod & nom-phase aren't enough info (???)
+ (when (here-mpi? (syntax-source-module id)) ;; REDUNDANT
+ (let ([b (identifier-binding id use-phase)])
+ (match b
+ [(list def-mod def-sym nom-mod nom-sym
+ def-phase nom-imp-phase nom-exp-phase)
+ ;; use-phase = def-phase + required-phase
+ ;; thus required-phase = use-phase - def-phase
+ (let* ([required-phase (- use-phase def-phase)]
+ [key (list required-phase (mpi->key def-mod))])
+ (hash-set! DEF-USES key
+ (cons id (hash-ref DEF-USES key null))))
+ ;; use-phase = nom-imp-phase + nom-exp-phase ?????
+ ;; We just care about nom-imp-phase, since importing into *here*
+ #|
+ ;; FIXME: This check goes wrong on defined-for-syntax ids
+ (unless (equal? use-phase (+ nom-imp-phase nom-exp-phase))
+ (error 'calculate
+ "internal error: phases wrong in ~s @ ~s, binding = ~s"
+ id use-phase b))
+ |#
+ (let ([key (list nom-imp-phase (mpi->key nom-mod))])
+ (hash-set! NOM-USES key
+ (cons id (hash-ref NOM-USES key null))))]
+ [_
+ (void)]))))
+ (values NOM-USES DEF-USES)))
+
+;; ========
+
+;; get-requires : compiled-module-expr -> (listof (list int MPI))
+(define (get-requires compiled)
+ (let ([phase+mods-list (module-compiled-imports compiled)])
+ (for*/list ([phase+mods (in-list phase+mods-list)]
+ #:when (car phase+mods) ;; Skip for-label requires
+ [mod (cdr phase+mods)])
+ (list (car phase+mods) mod))))
+
+;; add-provides! : compiled-module-expr UsedTable UsedTable -> void
+(define (add-provides! compiled NOM-USES DEF-USES)
+ (define (add! mpi phase)
+ (let ([key (list phase (mpi->key mpi))])
+ (hash-set! NOM-USES key (cons 'export (hash-ref NOM-USES key null)))
+ (hash-set! DEF-USES key (cons 'export (hash-ref DEF-USES key null)))))
+ (let-values ([(vprov sprov) (module-compiled-exports compiled)])
+ (for* ([phase+exps (in-list (append vprov sprov))]
+ #:when (car phase+exps) ;; Skip for-label provides
+ [name+srcs (in-list (cdr phase+exps))]
+ [src (in-list (cadr name+srcs))])
+ (let ([name (car name+srcs)])
+ (match src
+ [(? module-path-index?)
+ (add! src 0)]
+ [(list imp-mod imp-phase-shift imp-name imp-phase-???)
+ (add! imp-mod imp-phase-shift)])))))
+
+;; ========
+
+;; A ModuleDB = hash[path/symbol => (U 'no-drop 'no-bypass)]
+;; 'no-drop = must not be dropped or bypassed because of, eg, side effects
+;; 'no-bypass = don't bypass in favor of private component modules
+;; but if the module is unused, can drop it
+;; (FIXME: replace with component module calculation and checking)
+
+(define (make-module-db mod+config-list)
+ (for/hash ([mod+config (in-list mod+config-list)])
+ (values (resolve-module-path (car mod+config) #f) (cadr mod+config))))
+
+;; module-db : ModuleDB
+(define module-db
+ (make-module-db
+ '([racket/base no-bypass]
+ [racket/contract/base no-bypass]
+ [racket/gui no-bypass]
+ [racket/match no-bypass]
+ ['#%builtin no-drop]
+
+ [typed-scheme/private/base-env no-drop]
+ [typed-scheme/private/base-special-env no-drop]
+ [typed-scheme/private/base-env-numeric no-drop]
+ [typed-scheme/private/base-env-indexing no-drop])))
+
+;; ========
+
+#|
+A recommendation is one of
+ (list 'keep module-path-index phase string/#f)
+ (list 'bypass module-path-index phase)
+ (list 'drop module-path-index phase)
+|#
+
+;; check-requires : module-path -> (listof recommendation)
+(define (check-requires mod-path)
+ (let-values ([(compiled deriv) (get-module-code/trace mod-path)])
+ (let ([refs (new-reftable)])
+ (analyze deriv refs)
+ (let-values ([(NOM-USES DEF-USES) (calculate-used-approximations refs)])
+ (add-provides! compiled NOM-USES DEF-USES)
+ (report NOM-USES DEF-USES (get-requires compiled))))))
+
+;; report : UseTable UseTable (listof (list int mpi)) -> (listof recommendation)
+(define (report NOM-USES DEF-USES phase+mod-list)
+ (for/list ([phase+mod (in-list phase+mod-list)])
+ (let* ([key (list (car phase+mod) (mpi->key (cadr phase+mod)))]
+ [db-config
+ (hash-ref module-db
+ (resolved-module-path-name
+ (module-path-index-resolve (cadr phase+mod)))
+ #f)]
+ [nom-ids (hash-ref NOM-USES key null)]
+ [def-ids (hash-ref DEF-USES key null)]
+ [phase (car phase+mod)]
+ [mod (cadr phase+mod)]
+ [name (format "~s at ~s" (mpi->key mod) phase)])
+ (cond [(and (pair? nom-ids) (pair? def-ids))
+ (list 'keep mod phase (if (ormap identifier? nom-ids) #f "for exports"))]
+ [(pair? nom-ids)
+ (if (memq db-config '(no-bypass no-drop))
+ (list 'keep mod phase "db says no-bypass")
+ (list 'bypass mod phase))]
+ [else
+ (if (memq db-config '(no-drop))
+ (list 'keep mod phase "db says no-drop")
+ (list 'drop mod phase))]))))
+
+(define (mpi->key x)
+ (let ([l (mpi->list x)])
+ (if (and (pair? l) (null? (cdr l)))
+ (car l)
+ l)))
+
+(define (mpi->list x)
+ (cond [(module-path-index? x)
+ (let-values ([(rel base) (module-path-index-split x)])
+ (if rel
+ (cons rel (mpi->list base))
+ null))]
+ [(eq? x #f)
+ null]
+ [else
+ (list x)]))
+
+#|
+TODO
+====
+
+Elaborate on BYPASS recommendations by finding the necessary modules
+further up the require chain to require directly.
+ - don't recommend private modules, though... heuristic
+
+Let user provide database of modules that should never be dropped, eg
+because they have side effects.
+ - wouldn't it be awesome if this db could be a datalog program?
+ - start simpler, though
+
+Verbose mode should show identifiers used by a module (for KEEP).
+For example, if only one used, good candidate to split out, if possible.
+
+Ambitious mode could analyze module and recommend ways to split module
+into independent submodules.
+
+More options for quote-syntax handling & explain current heuristic better.
+
+Handle for-label.
+|#