From 11acf7101b72b67fde3094b9f624cf3db21f3393 Mon Sep 17 00:00:00 2001
From: "nweiz@google.com" <nweiz@google.com>
Date: Thu, 31 Jan 2013 22:39:53 +0000
Subject: [PATCH] Add a Pub validator for READMEs that are invalid utf-8.

BUG=7045

Review URL: https://codereview.chromium.org//12090081

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge@17948 260f80e4-7a28-3924-810f-c04153c831b5
---
 lib/src/io.dart                    | 24 ++++++++++++++++++
 lib/src/package.dart               | 22 +++++++++++++++++
 lib/src/validator.dart             |  4 ++-
 lib/src/validator/utf8_readme.dart | 39 ++++++++++++++++++++++++++++++
 test/test_pub.dart                 | 27 ++++++++++++++-------
 test/validator_test.dart           | 19 +++++++++++++++
 6 files changed, 125 insertions(+), 10 deletions(-)
 create mode 100644 lib/src/validator/utf8_readme.dart

diff --git a/lib/src/io.dart b/lib/src/io.dart
index d45e9298..8243a281 100644
--- a/lib/src/io.dart
+++ b/lib/src/io.dart
@@ -85,6 +85,16 @@ Future<String> readTextFile(file) {
       });
 }
 
+/// Reads the contents of the binary file [file], which can either be a [String]
+/// or a [File].
+List<int> readBinaryFile(file) {
+  var path = _getPath(file);
+  log.io("Reading binary file $path.");
+  var contents = new File(path).readAsBytesSync();
+  log.io("Read ${contents.length} bytes from $path.");
+  return contents;
+}
+
 /// Creates [file] (which can either be a [String] or a [File]), and writes
 /// [contents] to it. Completes when the file is written and closed.
 ///
@@ -109,6 +119,20 @@ Future<File> writeTextFile(file, String contents, {dontLogContents: false}) {
   });
 }
 
+/// Creates [file] (which can either be a [String] or a [File]), and writes
+/// [contents] to it.
+File writeBinaryFile(file, List<int> contents) {
+  var path = _getPath(file);
+  file = new File(path);
+
+  log.io("Writing ${contents.length} bytes to binary file $path.");
+  file.openSync(FileMode.WRITE)
+      ..writeListSync(contents, 0, contents.length)
+      ..closeSync();
+  log.fine("Wrote text file $path.");
+  return file;
+}
+
 /// Asynchronously deletes [file], which can be a [String] or a [File]. Returns
 /// a [Future] that completes when the deletion is done.
 Future<File> deleteFile(file) {
diff --git a/lib/src/package.dart b/lib/src/package.dart
index f62e2bec..15fcb97b 100644
--- a/lib/src/package.dart
+++ b/lib/src/package.dart
@@ -11,6 +11,8 @@ import 'source.dart';
 import 'source_registry.dart';
 import 'version.dart';
 
+final _README_REGEXP = new RegExp(r"^README($|\.)", caseSensitive: false);
+
 /// A named, versioned, unit of code and resource reuse.
 class Package {
   /// Loads the package whose root directory is [packageDir]. [name] is the
@@ -58,6 +60,26 @@ class Package {
   /// specified in the pubspec when this package depends on another.
   Collection<PackageRef> get dependencies => pubspec.dependencies;
 
+  /// Returns the path to the README file at the root of the entrypoint, or null
+  /// if no README file is found. If multiple READMEs are found, this uses the
+  /// same conventions as pub.dartlang.org for choosing the primary one: the
+  /// README with the fewest extensions that is lexically ordered first is
+  /// chosen.
+  Future<String> get readmePath {
+    return listDir(dir).then((entries) {
+      var readmes = entries.where((entry) => entry.contains(_README_REGEXP));
+      if (readmes.isEmpty) return;
+
+      return readmes.min((readme1, readme2) {
+        var extensions1 = ".".allMatches(readme1).length;
+        var extensions2 = ".".allMatches(readme2).length;
+        var comparison = extensions1.compareTo(extensions2);
+        if (comparison != 0) return comparison;
+        return readme1.compareTo(readme2);
+      });
+    });
+  }
+
   /// Constructs a package with the given pubspec. The package will have no
   /// directory associated with it.
   Package.inMemory(this.pubspec)
diff --git a/lib/src/validator.dart b/lib/src/validator.dart
index 276a4af1..2dd5420f 100644
--- a/lib/src/validator.dart
+++ b/lib/src/validator.dart
@@ -18,6 +18,7 @@ import 'validator/lib.dart';
 import 'validator/license.dart';
 import 'validator/name.dart';
 import 'validator/pubspec_field.dart';
+import 'validator/utf8_readme.dart';
 
 /// The base class for validators that check whether a package is fit for
 /// uploading. Each validator should override [errors], [warnings], or both to
@@ -52,7 +53,8 @@ abstract class Validator {
       new PubspecFieldValidator(entrypoint),
       new DependencyValidator(entrypoint),
       new DirectoryValidator(entrypoint),
-      new CompiledDartdocValidator(entrypoint)
+      new CompiledDartdocValidator(entrypoint),
+      new Utf8ReadmeValidator(entrypoint)
     ];
 
     return Future.wait(validators.map((validator) => validator.validate()))
diff --git a/lib/src/validator/utf8_readme.dart b/lib/src/validator/utf8_readme.dart
new file mode 100644
index 00000000..e5862305
--- /dev/null
+++ b/lib/src/validator/utf8_readme.dart
@@ -0,0 +1,39 @@
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library utf8_readme_validator;
+
+import 'dart:async';
+import 'dart:utf';
+
+import '../../../pkg/path/lib/path.dart' as path;
+
+import '../entrypoint.dart';
+import '../io.dart';
+import '../utils.dart';
+import '../validator.dart';
+
+/// Validates that a package's README is valid utf-8.
+class Utf8ReadmeValidator extends Validator {
+  Utf8ReadmeValidator(Entrypoint entrypoint)
+    : super(entrypoint);
+
+  Future validate() {
+    return entrypoint.root.readmePath.then((readme) {
+      if (readme == null) return;
+      var bytes = readBinaryFile(readme);
+      try {
+        // The second and third arguments here are the default values. The
+        // fourth tells [decodeUtf8] to throw an ArgumentError if `bytes` isn't
+        // valid utf-8.
+        decodeUtf8(bytes, 0, null, null);
+      } on ArgumentError catch (_) {
+        warnings.add("$readme contains invalid UTF-8.\n"
+            "This will cause it to be displayed incorrectly on "
+                "pub.dartlang.org.");
+      }
+    });
+  }
+}
+
diff --git a/test/test_pub.dart b/test/test_pub.dart
index eeaddc96..27708ff1 100644
--- a/test/test_pub.dart
+++ b/test/test_pub.dart
@@ -14,6 +14,7 @@ import 'dart:io';
 import 'dart:json' as json;
 import 'dart:math';
 import 'dart:uri';
+import 'dart:utf';
 
 import '../../../pkg/http/lib/testing.dart';
 import '../../../pkg/oauth2/lib/oauth2.dart' as oauth2;
@@ -49,6 +50,10 @@ initConfig() {
 FileDescriptor file(Pattern name, String contents) =>
     new FileDescriptor(name, contents);
 
+/// Creates a new [FileDescriptor] with [name] and [contents].
+FileDescriptor binaryFile(Pattern name, List<int> contents) =>
+    new FileDescriptor.bytes(name, contents);
+
 /// Creates a new [DirectoryDescriptor] with [name] and [contents].
 DirectoryDescriptor dir(Pattern name, [List<Descriptor> contents]) =>
     new DirectoryDescriptor(name, contents);
@@ -866,16 +871,20 @@ abstract class Descriptor {
 /// tree before running a test, and for validating that the file system matches
 /// some expectations after running it.
 class FileDescriptor extends Descriptor {
-  /// The text contents of the file.
-  final String contents;
+  /// The contents of the file, in bytes.
+  final List<int> contents;
+
+  String get textContents => new String.fromCharCodes(contents);
 
-  FileDescriptor(Pattern name, this.contents) : super(name);
+  FileDescriptor.bytes(Pattern name, this.contents) : super(name);
+
+  FileDescriptor(Pattern name, String contents) :
+      this.bytes(name, encodeUtf8(contents));
 
   /// Creates the file within [dir]. Returns a [Future] that is completed after
   /// the creation is done.
-  Future<File> create(dir) {
-    return writeTextFile(join(dir, _stringName), contents);
-  }
+  Future<File> create(dir) => new Future.immediate(null).then((_) =>
+      writeBinaryFile(join(dir, _stringName), contents));
 
   /// Deletes the file within [dir]. Returns a [Future] that is completed after
   /// the deletion is done.
@@ -887,10 +896,10 @@ class FileDescriptor extends Descriptor {
   Future validate(String path) {
     return _validateOneMatch(path, (file) {
       return readTextFile(file).then((text) {
-        if (text == contents) return null;
+        if (text == textContents) return null;
 
         throw new ExpectException(
-            'File $file should contain:\n\n$contents\n\n'
+            'File $file should contain:\n\n$textContents\n\n'
             'but contained:\n\n$text');
       });
     });
@@ -904,7 +913,7 @@ class FileDescriptor extends Descriptor {
     }
 
     var stream = new ListInputStream();
-    stream.write(contents.charCodes);
+    stream.write(contents);
     stream.markEndOfStream();
     return stream;
   }
diff --git a/test/validator_test.dart b/test/validator_test.dart
index cc89c62a..bc4e30e4 100644
--- a/test/validator_test.dart
+++ b/test/validator_test.dart
@@ -22,6 +22,7 @@ import '../../pub/validator/lib.dart';
 import '../../pub/validator/license.dart';
 import '../../pub/validator/name.dart';
 import '../../pub/validator/pubspec_field.dart';
+import '../../pub/validator/utf8_readme.dart';
 
 void expectNoValidationError(ValidatorCreator fn) {
   expectLater(schedulePackageValidation(fn), pairOf(isEmpty, isEmpty));
@@ -53,6 +54,9 @@ Validator name(Entrypoint entrypoint) => new NameValidator(entrypoint);
 Validator pubspecField(Entrypoint entrypoint) =>
   new PubspecFieldValidator(entrypoint);
 
+Validator utf8Readme(Entrypoint entrypoint) =>
+  new Utf8ReadmeValidator(entrypoint);
+
 void scheduleNormalPackage() => normalPackage.scheduleCreate();
 
 main() {
@@ -141,6 +145,14 @@ main() {
       ]).scheduleCreate();
       expectNoValidationError(compiledDartdoc);
     });
+
+    integration('has a non-primary readme with invalid utf-8', () {
+      dir(appPath, [
+        file("README", "Valid utf-8"),
+        binaryFile("README.invalid", [192])
+      ]).scheduleCreate();
+      expectNoValidationError(utf8Readme);
+    });
   });
 
   group('should consider a package invalid if it', () {
@@ -550,5 +562,12 @@ main() {
 
       expectValidationWarning(compiledDartdoc);
     });
+
+    test('has a README with invalid utf-8', () {
+      dir(appPath, [
+        binaryFile("README", [192])
+      ]).scheduleCreate();
+      expectValidationWarning(utf8Readme);
+    });
   });
 }
-- 
GitLab