001    /* URLStreamHandler.java -- Abstract superclass for all protocol handlers
002       Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010    
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    package java.net;
039    
040    import java.io.File;
041    import java.io.IOException;
042    
043    
044    /*
045     * Written using on-line Java Platform 1.2 API Specification, as well
046     * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
047     * Status:  Believed complete and correct.
048     */
049    
050    /**
051     * This class is the superclass of all URL protocol handlers.  The URL
052     * class loads the appropriate protocol handler to establish a connection
053     * to a (possibly) remote service (eg, "http", "ftp") and to do protocol
054     * specific parsing of URL's.  Refer to the URL class documentation for
055     * details on how that class locates and loads protocol handlers.
056     * <p>
057     * A protocol handler implementation should override the openConnection()
058     * method, and optionally override the parseURL() and toExternalForm()
059     * methods if necessary. (The default implementations will parse/write all
060     * URL's in the same form as http URL's).  A protocol  specific subclass
061     * of URLConnection will most likely need to be created as well.
062     * <p>
063     * Note that the instance methods in this class are called as if they
064     * were static methods.  That is, a URL object to act on is passed with
065     * every call rather than the caller assuming the URL is stored in an
066     * instance variable of the "this" object.
067     * <p>
068     * The methods in this class are protected and accessible only to subclasses.
069     * URLStreamConnection objects are intended for use by the URL class only,
070     * not by other classes (unless those classes are implementing protocols).
071     *
072     * @author Aaron M. Renn (arenn@urbanophile.com)
073     * @author Warren Levy (warrenl@cygnus.com)
074     *
075     * @see URL
076     */
077    public abstract class URLStreamHandler
078    {
079      /**
080       * Creates a URLStreamHander
081       */
082      public URLStreamHandler()
083      {
084      }
085    
086      /**
087       * Returns a URLConnection for the passed in URL.  Note that this should
088       * not actually create the connection to the (possibly) remote host, but
089       * rather simply return a URLConnection object.  The connect() method of
090       * URL connection is used to establish the actual connection, possibly
091       * after the caller sets up various connection options.
092       *
093       * @param url The URL to get a connection object for
094       *
095       * @return A URLConnection object for the given URL
096       *
097       * @exception IOException If an error occurs
098       */
099      protected abstract URLConnection openConnection(URL url)
100        throws IOException;
101    
102      /**
103       * This method parses the string passed in as a URL and set's the
104       * instance data fields in the URL object passed in to the various values
105       * parsed out of the string.  The start parameter is the position to start
106       * scanning the string.  This is usually the position after the ":" which
107       * terminates the protocol name.  The end parameter is the position to
108       * stop scanning.  This will be either the end of the String, or the
109       * position of the "#" character, which separates the "file" portion of
110       * the URL from the "anchor" portion.
111       * <p>
112       * This method assumes URL's are formatted like http protocol URL's, so
113       * subclasses that implement protocols with URL's the follow a different
114       * syntax should override this method.  The lone exception is that if
115       * the protocol name set in the URL is "file", this method will accept
116       * an empty hostname (i.e., "file:///"), which is legal for that protocol
117       *
118       * @param url The URL object in which to store the results
119       * @param spec The String-ized URL to parse
120       * @param start The position in the string to start scanning from
121       * @param end The position in the string to stop scanning
122       */
123      protected void parseURL(URL url, String spec, int start, int end)
124      {
125        String host = url.getHost();
126        int port = url.getPort();
127        String file = url.getFile();
128        String ref = url.getRef();
129        String userInfo = url.getUserInfo();
130        String authority = url.getAuthority();
131        String query = null;
132        
133        // On Windows we need to change \ to / for file URLs
134        char separator = File.separatorChar;
135        if (url.getProtocol().equals("file") && separator != '/')
136          {
137            file = file.replace(separator, '/');
138            spec = spec.replace(separator, '/');
139          }
140    
141        if (spec.regionMatches(start, "//", 0, 2))
142          {
143            String genuineHost;
144            int hostEnd;
145            int colon;
146            int at_host;
147    
148            start += 2;
149            int slash = spec.indexOf('/', start);
150            if (slash >= 0)
151              hostEnd = slash;
152            else
153              hostEnd = end;
154    
155            authority = host = spec.substring(start, hostEnd);
156    
157            // We first need a genuine host name (with userinfo).
158            // So we check for '@': if it's present check the port in the
159            // section after '@' in the other case check it in the full string.
160            // P.S.: We don't care having '@' at the beginning of the string.
161            if ((at_host = host.indexOf('@')) >= 0)
162              {
163                genuineHost = host.substring(at_host);
164                userInfo = host.substring(0, at_host);
165              }
166            else
167              genuineHost = host;
168    
169            // Look for optional port number.  It is valid for the non-port
170            // part of the host name to be null (e.g. a URL "http://:80").
171            // TBD: JDK 1.2 in this case sets host to null rather than "";
172            // this is undocumented and likely an unintended side effect in 1.2
173            // so we'll be simple here and stick with "". Note that
174            // "http://" or "http:///" produce a "" host in JDK 1.2.
175            if ((colon = genuineHost.indexOf(':')) >= 0)
176              {
177                try
178                  {
179                    port = Integer.parseInt(genuineHost.substring(colon + 1));
180                  }
181                catch (NumberFormatException e)
182                  {
183                    // Ignore invalid port values; port is already set to u's
184                    // port.
185                  }
186    
187                // Now we must cut the port number in the original string.
188                if (at_host >= 0)
189                  host = host.substring(0, at_host + colon);
190                else
191                  host = host.substring(0, colon);
192              }
193            file = null;
194            start = hostEnd;
195          }
196        else if (host == null)
197          host = "";
198    
199        if (file == null || file.length() == 0
200            || (start < end && spec.charAt(start) == '/'))
201          {
202            // No file context available; just spec for file.
203            // Or this is an absolute path name; ignore any file context.
204            file = spec.substring(start, end);
205            ref = null;
206          }
207        else if (start < end)
208          {
209            // Context is available, but only override it if there is a new file.
210            int lastSlash = file.lastIndexOf('/');
211            if (lastSlash < 0)
212              file = spec.substring(start, end);
213            else
214              file = (file.substring(0, lastSlash)
215                      + '/' + spec.substring(start, end));
216    
217            // For URLs constructed relative to a context, we
218            // need to canonicalise the file path.
219            file = canonicalizeFilename(file);
220    
221            ref = null;
222          }
223    
224        if (ref == null)
225          {
226            // Normally there should be no '#' in the file part,
227            // but we are nice.
228            int hash = file.indexOf('#');
229            if (hash != -1)
230              {
231                ref = file.substring(hash + 1, file.length());
232                file = file.substring(0, hash);
233              }
234          }
235    
236        // We care about the query tag only if there is no reference at all.
237        if (ref == null)
238          {
239              int queryTag = file.indexOf('?');
240              if (queryTag != -1)
241                {
242                  query = file.substring(queryTag + 1);
243                  file = file.substring(0, queryTag);
244                }
245          }
246    
247        // XXX - Classpath used to call PlatformHelper.toCanonicalForm() on
248        // the file part. It seems like overhead, but supposedly there is some
249        // benefit in windows based systems (it also lowercased the string).
250        setURL(url, url.getProtocol(), host, port, authority, userInfo, file, query, ref);
251      }
252    
253      /*
254       * Canonicalize a filename.
255       */
256      private static String canonicalizeFilename(String file)
257      {
258        // XXX - GNU Classpath has an implementation that might be more appropriate
259        // for Windows based systems (gnu.java.io.PlatformHelper.toCanonicalForm)
260        int index;
261    
262        // Replace "/./" with "/".  This probably isn't very efficient in
263        // the general case, but it's probably not bad most of the time.
264        while ((index = file.indexOf("/./")) >= 0)
265          file = file.substring(0, index) + file.substring(index + 2);
266    
267        // Process "/../" correctly.  This probably isn't very efficient in
268        // the general case, but it's probably not bad most of the time.
269        while ((index = file.indexOf("/../")) >= 0)
270          {
271            // Strip of the previous directory - if it exists.
272            int previous = file.lastIndexOf('/', index - 1);
273            if (previous >= 0)
274              file = file.substring(0, previous) + file.substring(index + 3);
275            else
276              break;
277          }
278        return file;
279      }
280    
281      /**
282       * Compares two URLs, excluding the fragment component
283       *
284       * @param url1 The first url
285       * @param url2 The second url to compare with the first
286       *
287       * @return True if both URLs point to the same file, false otherwise.
288       *
289       * @specnote Now protected
290       */
291      protected boolean sameFile(URL url1, URL url2)
292      {
293        if (url1 == url2)
294          return true;
295    
296        // This comparison is very conservative.  It assumes that any
297        // field can be null.
298        if (url1 == null || url2 == null)
299          return false;
300        int p1 = url1.getPort();
301        if (p1 == -1)
302          p1 = url1.ph.getDefaultPort();
303        int p2 = url2.getPort();
304        if (p2 == -1)
305          p2 = url2.ph.getDefaultPort();
306        if (p1 != p2)
307          return false;
308        String s1;
309        String s2;
310        s1 = url1.getProtocol();
311        s2 = url2.getProtocol();
312        if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
313          return false;
314        s1 = url1.getHost();
315        s2 = url2.getHost();
316        if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
317          return false;
318        s1 = canonicalizeFilename(url1.getFile());
319        s2 = canonicalizeFilename(url2.getFile());
320        if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
321          return false;
322        return true;
323      }
324    
325      /**
326       * This methods sets the instance variables representing the various fields
327       * of the URL to the values passed in.
328       *
329       * @param u The URL to modify
330       * @param protocol The protocol to set
331       * @param host The host name to et
332       * @param port The port number to set
333       * @param file The filename to set
334       * @param ref The reference
335       *
336       * @exception SecurityException If the protocol handler of the URL is
337       * different from this one
338       *
339       * @deprecated 1.2 Please use
340       * #setURL(URL,String,String,int,String,String,String,String);
341       */
342      protected void setURL(URL u, String protocol, String host, int port,
343                            String file, String ref)
344      {
345        u.set(protocol, host, port, file, ref);
346      }
347    
348      /**
349       * Sets the fields of the URL argument to the indicated values
350       *
351       * @param u The URL to modify
352       * @param protocol The protocol to set
353       * @param host The host name to set
354       * @param port The port number to set
355       * @param authority The authority to set
356       * @param userInfo The user information to set
357       * @param path The path/filename to set
358       * @param query The query part to set
359       * @param ref The reference
360       *
361       * @exception SecurityException If the protocol handler of the URL is
362       * different from this one
363       */
364      protected void setURL(URL u, String protocol, String host, int port,
365                            String authority, String userInfo, String path,
366                            String query, String ref)
367      {
368        u.set(protocol, host, port, authority, userInfo, path, query, ref);
369      }
370    
371      /**
372       * This is the default method for computing whether two URLs are
373       * equivalent.  This method assumes that neither URL is null.
374       *
375       * @param url1 An URL object
376       * @param url2 Another URL object
377       *
378       * @return True if both given URLs are equal, false otherwise.
379       */
380      protected boolean equals(URL url1, URL url2)
381      {
382        // This comparison is very conservative.  It assumes that any
383        // field can be null.
384        int port1 = url1.getPort();
385        if (port1 == -1)
386          port1 = url1.getDefaultPort();
387        int port2 = url2.getPort();
388        if (port2 == -1)
389          port2 = url2.getDefaultPort();
390        // Note that we don't bother checking the 'authority'; it is
391        // redundant.
392        return (port1 == port2
393               && ((url1.getProtocol() == null && url2.getProtocol() == null)
394               || (url1.getProtocol() != null
395               && url1.getProtocol().equals(url2.getProtocol())))
396               && ((url1.getUserInfo() == null && url2.getUserInfo() == null)
397               || (url1.getUserInfo() != null
398               && url1.getUserInfo().equals(url2.getUserInfo())))
399               && ((url1.getHost() == null && url2.getHost() == null)
400               || (url1.getHost() != null && url1.getHost().equals(url2.getHost())))
401               && ((url1.getPath() == null && url2.getPath() == null)
402               || (url1.getPath() != null && url1.getPath().equals(url2.getPath())))
403               && ((url1.getQuery() == null && url2.getQuery() == null)
404               || (url1.getQuery() != null
405               && url1.getQuery().equals(url2.getQuery())))
406               && ((url1.getRef() == null && url2.getRef() == null)
407               || (url1.getRef() != null && url1.getRef().equals(url2.getRef()))));
408      }
409    
410      /**
411       * Compares the host components of two URLs.
412       *
413       * @param url1 The first URL.
414       * @param url2 The second URL.
415       *
416       * @return True if both URLs contain the same host.
417       */
418      protected boolean hostsEqual(URL url1, URL url2)
419      {
420        InetAddress addr1 = getHostAddress(url1);
421        InetAddress addr2 = getHostAddress(url2);
422    
423        if (addr1 != null && addr2 != null)
424          return addr1.equals(addr2);
425    
426        String host1 = url1.getHost();
427        String host2 = url2.getHost();
428    
429        if (host1 != null && host2 != null)
430          return host1.equalsIgnoreCase(host2);
431    
432        return host1 == null && host2 == null;
433      }
434    
435      /**
436       * Get the IP address of our host. An empty host field or a DNS failure will
437       * result in a null return.
438       *
439       * @param url The URL to return the host address for.
440       *
441       * @return The address of the hostname in url.
442       */
443      protected InetAddress getHostAddress(URL url)
444      {
445        String hostname = url.getHost();
446    
447        if (hostname.equals(""))
448          return null;
449    
450        try
451          {
452            return InetAddress.getByName(hostname);
453          }
454        catch (UnknownHostException e)
455          {
456            return null;
457          }
458      }
459    
460      /**
461       * Returns the default port for a URL parsed by this handler. This method is
462       * meant to be overidden by handlers with default port numbers.
463       *
464       * @return The default port number.
465       */
466      protected int getDefaultPort()
467      {
468        return -1;
469      }
470    
471      /**
472       * Provides the default hash calculation. May be overidden by handlers for
473       * other protocols that have different requirements for hashCode calculation.
474       *
475       * @param url The URL to calc the hashcode for.
476       *
477       * @return The hashcode for the given URL.
478       */
479      protected int hashCode(URL url)
480      {
481        return url.getProtocol().hashCode()
482               + ((url.getHost() == null) ? 0 : url.getHost().hashCode())
483               + url.getFile().hashCode() + url.getPort();
484      }
485    
486      /**
487       * This method converts a URL object into a String.  This method creates
488       * Strings in the mold of http URL's, so protocol handlers which use URL's
489       * that have a different syntax should override this method
490       *
491       * @param url The URL object to convert
492       *
493       * @return A string representation of the url
494       */
495      protected String toExternalForm(URL url)
496      {
497        String protocol;
498        String file;
499        String ref;
500        String authority;
501    
502        protocol = url.getProtocol();
503        authority = url.getAuthority();
504        if (authority == null)
505          authority = "";
506        
507        file = url.getFile();
508        ref = url.getRef();
509    
510        // Guess a reasonable size for the string buffer so we have to resize
511        // at most once.
512        int size = protocol.length() + authority.length() + file.length() + 24;
513        StringBuffer sb = new StringBuffer(size);
514    
515        if (protocol.length() > 0)
516          {
517            sb.append(protocol);
518            sb.append(":");
519          }
520        
521        // If we have superfluous leading slashes (that means, at least 2)
522        // we always add the authority component ("//" + host) to
523        // avoid ambiguity. Otherwise we would generate an URL like
524        // proto://home/foo
525        // where we meant: 
526        // host: <empty> - file: //home/foo
527        // but URL spec says it is:
528        // host: home - file: /foo
529        if (authority.length() != 0 || file.startsWith("//") )
530          sb.append("//").append(authority).append(file);
531        else
532          sb.append(file);
533    
534        if (ref != null)
535          sb.append('#').append(ref);
536    
537        return sb.toString();
538      }
539    }