index.html

<!DOCTYPE html>
<html>

<head>
    <!-- Google tag (gtag.js) -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=G-XB3PR2Y1TQ"></script>
    <script>
    window.dataLayer = window.dataLayer || [];
    function gtag(){dataLayer.push(arguments);}
    gtag('js', new Date());

    gtag('config', 'G-XB3PR2Y1TQ');
    </script>

    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no">
    <title>ZeroForge: Feedforward Text-to-Shape Without 3D Supervision</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.5.0/css/bootstrap.min.css">
    <link href='https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,500,600' rel='stylesheet' type='text/css'>
    <link rel="stylesheet" href="/assets/css/Highlight-Clean.css">
    <link rel="stylesheet" href="/assets/css/styles.css">

    <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
    <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
    <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
    <link rel="manifest" href="/site.webmanifest">

    <meta property="og:site_name" content="ZeroForge: Feedforward Text-to-Shape Without 3D Supervision" />
    <meta property="og:type" content="video.other" />
    <meta property="og:title" content="ZeroForge: Feedforward Text-to-Shape Without 3D Supervision" />
    <meta property="og:description" content="ZeroForge: Feedforward Text-to-Shape Without 3D Supervision" />
    <meta property="og:url" content="https://nyu-dice-lab.github.io/ZeroForge/" />
    <meta property="og:image" content="/ZeroForge/docs/assets/figure3.pdf" />

    <meta property="article:publisher" content="https://nyu-dice-lab.github.io/ZeroForge/" />
    <meta name="twitter:card" content="summary_large_image" />
    <meta name="twitter:title" content="ZeroForge: Feedforward Text-to-Shape Without 3D Supervision" />
    <meta name="twitter:description" content="We expand the generative capability of feed-forward text-to-3D models beyond their 3D datasets" />
    <meta name="twitter:url" content="/ZeroForge/docs/assets/figure3.pdf" />
    <meta name="twitter:image" content="/ZeroForge/docs/assets/figure3.pdf" />
    <!-- <meta name="twitter:site" content="" /> -->

    <script src="assets/js/video_comparison.js"></script>
    <script type="module" src="https://unpkg.com/@google/model-viewer@2.0.1/dist/model-viewer.min.js"></script>
    <style>
        .banner {
        display: flex;
        justify-content: center; /* Center the banner horizontally */
        }

        .gif-banner {
        display: flex; /* Use flexbox to align items horizontally */
        align-items: center; /* Center the GIFs vertically */
        gap: -5px; /* Remove the whitespace between GIFs */
        }
        
        .gif-banner img {
        flex-shrink: 0; /* Prevent the GIFs from shrinking further */
        width: auto; /* Allow the GIFs to scale proportionally */
        height: 160px; /* Adjust the height as needed */
        }

        .gif-caption {
        text-align: center;
        margin-top: 5px; /* Adjust the spacing between the GIF and the caption */
        }
    </style>
</head>

<body>
    <div class="highlight-clean" style="padding-bottom: 10px;">
        <div class="container" style="max-width: 768px;">
            <h1 class="text-center"><b>ZeroForge</b>: Feedforward Text-to-Shape Without 3D Supervision</h1>
        </div>
        <div class="container" style="max-width: 768px;">
            <div class="row authors">
                <div class="col-sm-3">
                    <h5 class="text-center"><a class="text-center" href="https://km3888.github.io/">Kelly Marshall</a></h5>
                    <h6 class="text-center">New York University</h6>
                </div>
                <div class="col-sm-3">
                    <h5 class="text-center"><a href="https://www.mnpham.com/">Minh Pham</a></h5>
                    <h6 class="text-center">New York University</h6>
                </div>
                <div class="col-sm-3">
                    <h5 class="text-center"><a href="https://ameya005.github.io/">Ameya Joshi</a></h5>
                    <h6 class="text-center">New York University</h6>
                </div>
                <div class="col-sm-3">
                    <h5 class="text-center"><a class="text-center" href="https://adityabalu.github.io/">Aditya Balu</a></h5>
                    <h6 class="text-center">Iowa State University</h6>
                </div>
            </div>
            <div class="row authors">
                
                <div class="col-sm-3 offset-sm-1">
                    <h5 class="text-center"><a class="text-center" href="https://scholar.google.com/citations?user=Gb_YM5oAAAAJ&hl=en">Anushrut Jignasu</a></h5>
                    <h6 class="text-center">Iowa State University</h6>
                </div>
                <div class="col-sm-3">
                    <h5 class="text-center"><a class="text-center" href="https://web.me.iastate.edu/idealab/p-krishnamurthy.html">Adarsh Krishnamurthy</a></h5>
                    <h6 class="text-center">Iowa State University</h6>
                </div>
                <div class="col-sm-3">
                    <h5 class="text-center"><a class="text-center" href="https://chinmayhegde.github.io/">Chinmay Hegde</a></h5>
                    <h6 class="text-center">New York University</h6>
                </div>
            </div>
        </div>
        <div style="display: flex; justify-content: center;">
            <div class="buttons" style="margin-bottom: 8px;">
                <a class="btn btn-light" role="button" href="https://arxiv.org/abs/2306.08183" target="_blank">
                    <svg style="width:24px;height:24px;margin-left:-12px;margin-right:12px" viewBox="0 0 24 24">
                        <path fill="currentColor" d="M16 0H8C6.9 0 6 .9 6 2V18C6 19.1 6.9 20 8 20H20C21.1 20 22 19.1 22 18V6L16 0M20 18H8V2H15V7H20V18M4 4V22H20V24H4C2.9 24 2 23.1 2 22V4H4M10 10V12H18V10H10M10 14V16H15V14H10Z"></path>
                    </svg>Paper
                </a>
                <a class="btn btn-light" role="button" href="https://github.com/Km3888/ZeroForge">
                    <svg style="width:24px;height:24px;margin-left:-12px;margin-right:12px" viewBox="0 0 24 24">
                        <path fill="currentColor" d="M12,2A10,10 0 0,0 2,12C2,16.42 4.87,20.17 8.84,21.5C9.34,21.58 9.5,21.27 9.5,21C9.5,20.77 9.5,20.14 9.5,19.31C6.73,19.91 6.14,17.97 6.14,17.97C5.68,16.81 5.03,16.5 5.03,16.5C4.12,15.88 5.1,15.9 5.1,15.9C6.1,15.97 6.63,16.93 6.63,16.93C7.5,18.45 8.97,18 9.54,17.76C9.63,17.11 9.89,16.67 10.17,16.42C7.95,16.17 5.62,15.31 5.62,11.5C5.62,10.39 6,9.5 6.65,8.79C6.55,8.54 6.2,7.5 6.75,6.15C6.75,6.15 7.59,5.88 9.5,7.17C10.29,6.95 11.15,6.84 12,6.84C12.85,6.84 13.71,6.95 14.5,7.17C16.41,5.88 17.25,6.15 17.25,6.15C17.8,7.5 17.45,8.54 17.35,8.79C18,9.5 18.38,10.39 18.38,11.5C18.38,15.32 16.04,16.16 13.81,16.41C14.17,16.72 14.5,17.33 14.5,18.26C14.5,19.6 14.5,20.68 14.5,21C14.5,21.27 14.66,21.59 15.17,21.5C19.14,20.16 22,16.42 22,12A10,10 0 0,0 12,2Z"></path>
                    </svg>
                    Code
                </a>
            </div>
        </div>
    </div>
    <div class="banner">
        <div class="gif-banner">
            <div>
              <img src="/ZeroForge/docs/assets/videos/gifs/knife.gif" alt="GIF 1">
              <div class="gif-caption">"a knife"</div>
            </div>
            <div>
              <img src="/ZeroForge/docs/assets/videos/gifs/spoon.gif" alt="GIF 2">
              <div class="gif-caption">"a spoon"</div>
            </div>
            <div>
              <img src="/ZeroForge/docs/assets/videos/gifs/wineglass.gif" alt="GIF 3">
              <div class="gif-caption">"a wineglass"</div>
            </div>
            <div>
                <img src="/ZeroForge/docs/assets/videos/gifs/umbrella.gif" alt="GIF 4">
                <div class="gif-caption">"an umbrella"</div>
            </div>
  
          </div>
      
    </div>    
    <hr class="divider" />
    <div class="container" style="max-width: 768px;">
        <div class="row">
            <div class="col-md-12">
                <h2>Abstract</h2>
                <p>
                    <!-- <strong> -->
                        Current state-of-the-art methods for text-to-shape generation either require supervised training using a labeled dataset of pre-defined 3D shapes, or perform expensive inference-time optimization of implicit neural representations. In this work, we present ZeroForge, an approach for zero-shot text-to-shape generation that avoids both pitfalls. To achieve open-vocabulary shape generation, we require careful architectural adaptation of existing feed-forward approaches, as well as a combination of data-free CLIP-loss and contrastive losses to avoid mode collapse. Using these techniques, we are able to considerably expand the generative ability of existing feed-forward text-to-shape models such as CLIP-Forge. We support our method via extensive qualitative and quantitative evaluations.
                    <!-- </strong> -->
                </p>
            </div>
        </div>
    </div>
    <div class="container" style="max-width: 768px;">
        <div class="row">
                <div class="col-md-12">
                    <h2>How does ZeroForge work?</h2>
                    
                </div>
        </div>
    
        <div class="row">
                <img src="/ZeroForge/docs/assets/block_diagram_zeroconv.png" alt="Block Diagram">
            </div>
        <div class="row">
                <div class="col-md-12">
                    <p>Our method begins by taking a <a href="https://github.com/AutodeskAILab/Clip-Forge">CLIP-Forge</a> architecture which has been pre-trained on the ShapeNet dataset. To introduce novel concepts that we don't
                         have 3D data for, we formulate a training loss that encourages high similarity between the generator's text input and image renderings of the generator's 3D output. This similarity score can be computed
                        as an inner product in CLIP space by using CLIP's frozen image and text encoders. By using a differentiable rendering, we are able to use this metric as a training signal to update CLIP-Forge's weights and expand its generative capabilities.</p>
                    <p>In addition to this, we also find that there are several other measures which must be taken to achieve good results. To prevent mode collapse, we add an additional contrastive penalty which maintains diversity in outputs across different text queries.
                        One issue that can arise is the forgetting of the original shapes when training on new prompts outside ShapeNet. We address this by augmenting the CLIP-Forge decoder with a locked copy of the original parameters to allow rapid adaptation while 
                        preserving existing concepts.
                    </p>
                </div>
        </div>
    
        </div>
</div>
<hr class="divider" />
<div class="container" style="max-width: 768px;">
    <div class="row">
        <div class="col-md-12">

            <div class="col-md-12">
                <h2>Citation</h2>
                <code>
                    @misc{marshall2023zeroforge,<br>
                        title={ZeroForge: Feedforward Text-to-Shape Without 3D Supervision}, <br>
                        author={Kelly O. Marshall and Minh Pham and Ameya Joshi and Anushrut Jignasu <br> and Aditya Balu and Adarsh Krishnamurthy and Chinmay Hegde},<br>
                        year={2023},<br>
                        eprint={2306.08183}, <br>
                        archivePrefix={arXiv}, <br>
                        primaryClass={cs.CV} <br>
}
                </code>
            </div>
        </div>
</div>
    <script src="https://polyfill.io/v3/polyfill.js?features=IntersectionObserver"></script>
    <script src="/assets/js/yall.js"></script>
    <script>
        yall(
            {
                observeChanges: true
            }
        );
    </script>
    <script src="/assets/js/scripts.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.5.0/js/bootstrap.bundle.min.js"></script>
    <script src="https://uploads-ssl.webflow.com/51e0d73d83d06baa7a00000f/js/webflow.fd002feec.js"></script>
    <!-- Import the component -->
</body>

</html>